From db390c21b493feb1baa71b5948ddf3097608cb3c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 10:11:32 +0200 Subject: [PATCH 01/17] ENH add a parameter pos_label in roc_auc_score --- sklearn/metrics/_ranking.py | 16 +++++----- sklearn/metrics/tests/test_ranking.py | 43 ++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 6aab05a71707d..db8d7a8080efd 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -217,14 +217,16 @@ def _binary_uninterpolated_average_precision( average, sample_weight=sample_weight) -def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None): +def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None, + pos_label=None): """Binary roc auc score""" if len(np.unique(y_true)) != 2: raise ValueError("Only one class present in y_true. ROC AUC score " "is not defined in that case.") - fpr, tpr, _ = roc_curve(y_true, y_score, - sample_weight=sample_weight) + fpr, tpr, _ = roc_curve( + y_true, y_score, sample_weight=sample_weight, pos_label=pos_label, + ) if max_fpr is None or max_fpr == 1: return auc(fpr, tpr) if max_fpr <= 0 or max_fpr > 1: @@ -247,7 +249,8 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None): @_deprecate_positional_args def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, - max_fpr=None, multi_class="raise", labels=None): + max_fpr=None, multi_class="raise", labels=None, + pos_label=None): """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores. @@ -385,10 +388,9 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, return _multiclass_roc_auc_score(y_true, y_score, labels, multi_class, average, sample_weight) elif y_type == "binary": - labels = np.unique(y_true) - y_true = label_binarize(y_true, classes=labels)[:, 0] return _average_binary_score(partial(_binary_roc_auc_score, - max_fpr=max_fpr), + max_fpr=max_fpr, + pos_label=pos_label), y_true, y_score, average, sample_weight=sample_weight) else: # multilabel-indicator diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index a66ff9525c28c..e1c8053a59842 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -7,9 +7,13 @@ from sklearn import datasets from sklearn import svm -from sklearn.utils.extmath import softmax from sklearn.datasets import make_multilabel_classification +from sklearn.datasets import load_breast_cancer +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split from sklearn.random_projection import _sparse_random_matrix +from sklearn.utils import shuffle +from sklearn.utils.extmath import softmax from sklearn.utils.validation import check_array, check_consistent_length from sklearn.utils.validation import check_random_state @@ -1469,3 +1473,40 @@ def test_partial_roc_auc_score(): assert_almost_equal( roc_auc_score(y_true, y_pred, max_fpr=max_fpr), _partial_roc_auc_score(y_true, y_pred, max_fpr)) + + +@pytest.mark.parametrize( + "decision_method", ["predict_proba", "decision_function"] +) +def test_roc_auc_score_pos_label(decision_method): + X, y = load_breast_cancer(return_X_y=True) + # create an highly imbalanced + idx_positive = np.flatnonzero(y == 1) + idx_negative = np.flatnonzero(y == 0) + idx_selected = np.hstack([idx_negative, idx_positive[:25]]) + X, y = X[idx_selected], y[idx_selected] + X, y = shuffle(X, y, random_state=42) + # only use 2 features to make the problem even harder + X = X[:, :2] + y = np.array( + ["cancer" if c == 1 else "not cancer" for c in y], dtype=object + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, random_state=0, + ) + + classifier = LogisticRegression() + classifier.fit(X_train, y_train) + + # sanity check to be sure the positive class is classes_[0] and that we + # are betrayed by the class imbalance + assert classifier.classes_.tolist() == ["cancer", "not cancer"] + pos_label = "cancer" + + y_pred = getattr(classifier, decision_method)(X_test) + y_pred = y_pred[:, 0] if y_pred.ndim == 2 else -y_pred + + fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=pos_label) + roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label) + + assert roc_auc == pytest.approx(np.trapz(tpr, fpr)) From 0e2937ba73bdc03615810cab62d0bcbd99c40fbf Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 10:18:56 +0200 Subject: [PATCH 02/17] add documentation --- doc/whats_new/v0.24.rst | 5 ++++- sklearn/metrics/_ranking.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index ea27d7579ae4d..fbf2717ffbbf6 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -99,9 +99,12 @@ Changelog :pr:`17309` by :user:`Swier Heeres ` - |Enhancement| Add `sample_weight` parameter to - :class:`metrics.median_absolute_error`. :pr:`17225` by + :func:`metrics.median_absolute_error`. :pr:`17225` by :user:`Lucy Liu `. +- |Enhancement| Add `pos_label` parameter to :func:`roc_auc_score`. + :pr:`17594` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index db8d7a8080efd..cbb073db3c1cc 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -329,6 +329,11 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used. + pos_label : int or str, default=None + The label of the positive class in the binary case. When + `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is + set to 1, otherwise an error will be raised. + Returns ------- auc : float From 7f4fa4562d6af1fd3d4039810f8aa5073017011b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 11:18:29 +0200 Subject: [PATCH 03/17] TST pass pos_label with str in common test --- sklearn/metrics/tests/test_common.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 7301d21a35f39..e1cfdd0620a36 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -319,6 +319,17 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs): # Metrics with a "pos_label" argument METRICS_WITH_POS_LABEL = { "roc_curve", + + "roc_auc_score", + "weighted_roc_auc", + "samples_roc_auc", + "micro_roc_auc", + "ovr_roc_auc", + "weighted_ovr_roc_auc", + "ovo_roc_auc", + "weighted_ovo_roc_auc", + "partial_roc_auc", + "precision_recall_curve", "brier_score_loss", From 1852d50b0f9614743ef4d43234f2fbb40dc35a0a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 10:11:32 +0200 Subject: [PATCH 04/17] ENH add a parameter pos_label in roc_auc_score --- sklearn/metrics/_ranking.py | 16 +++++----- sklearn/metrics/tests/test_ranking.py | 43 ++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index e07f61a92d478..5737f257912dc 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -218,14 +218,16 @@ def _binary_uninterpolated_average_precision( average, sample_weight=sample_weight) -def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None): +def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None, + pos_label=None): """Binary roc auc score""" if len(np.unique(y_true)) != 2: raise ValueError("Only one class present in y_true. ROC AUC score " "is not defined in that case.") - fpr, tpr, _ = roc_curve(y_true, y_score, - sample_weight=sample_weight) + fpr, tpr, _ = roc_curve( + y_true, y_score, sample_weight=sample_weight, pos_label=pos_label, + ) if max_fpr is None or max_fpr == 1: return auc(fpr, tpr) if max_fpr <= 0 or max_fpr > 1: @@ -248,7 +250,8 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None): @_deprecate_positional_args def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, - max_fpr=None, multi_class="raise", labels=None): + max_fpr=None, multi_class="raise", labels=None, + pos_label=None): """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores. @@ -388,10 +391,9 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, return _multiclass_roc_auc_score(y_true, y_score, labels, multi_class, average, sample_weight) elif y_type == "binary": - labels = np.unique(y_true) - y_true = label_binarize(y_true, classes=labels)[:, 0] return _average_binary_score(partial(_binary_roc_auc_score, - max_fpr=max_fpr), + max_fpr=max_fpr, + pos_label=pos_label), y_true, y_score, average, sample_weight=sample_weight) else: # multilabel-indicator diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 3daafa8d196d3..dd771570e8481 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -7,9 +7,13 @@ from sklearn import datasets from sklearn import svm -from sklearn.utils.extmath import softmax from sklearn.datasets import make_multilabel_classification +from sklearn.datasets import load_breast_cancer +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split from sklearn.random_projection import _sparse_random_matrix +from sklearn.utils import shuffle +from sklearn.utils.extmath import softmax from sklearn.utils.validation import check_array, check_consistent_length from sklearn.utils.validation import check_random_state @@ -1469,3 +1473,40 @@ def test_partial_roc_auc_score(): assert_almost_equal( roc_auc_score(y_true, y_pred, max_fpr=max_fpr), _partial_roc_auc_score(y_true, y_pred, max_fpr)) + + +@pytest.mark.parametrize( + "decision_method", ["predict_proba", "decision_function"] +) +def test_roc_auc_score_pos_label(decision_method): + X, y = load_breast_cancer(return_X_y=True) + # create an highly imbalanced + idx_positive = np.flatnonzero(y == 1) + idx_negative = np.flatnonzero(y == 0) + idx_selected = np.hstack([idx_negative, idx_positive[:25]]) + X, y = X[idx_selected], y[idx_selected] + X, y = shuffle(X, y, random_state=42) + # only use 2 features to make the problem even harder + X = X[:, :2] + y = np.array( + ["cancer" if c == 1 else "not cancer" for c in y], dtype=object + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, random_state=0, + ) + + classifier = LogisticRegression() + classifier.fit(X_train, y_train) + + # sanity check to be sure the positive class is classes_[0] and that we + # are betrayed by the class imbalance + assert classifier.classes_.tolist() == ["cancer", "not cancer"] + pos_label = "cancer" + + y_pred = getattr(classifier, decision_method)(X_test) + y_pred = y_pred[:, 0] if y_pred.ndim == 2 else -y_pred + + fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=pos_label) + roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label) + + assert roc_auc == pytest.approx(np.trapz(tpr, fpr)) From 62efb2d216c6272f0b9fc4f8ca3b8365f6ee2e4c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 10:18:56 +0200 Subject: [PATCH 05/17] add documentation --- doc/whats_new/v0.24.rst | 3 +++ sklearn/metrics/_ranking.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index da366c913f500..42a9382e2d7b7 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -240,6 +240,9 @@ Changelog ``metric='seuclidean'`` and ``X`` is not type ``np.float64``. :pr:`15730` by :user:`Forrest Koch `. +- |Enhancement| Add `pos_label` parameter to :func:`roc_auc_score`. + :pr:`17594` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 5737f257912dc..4682fc4a6854c 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -330,6 +330,11 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, If ``None``, the numerical or lexicographical order of the labels in ``y_true`` is used. + pos_label : int or str, default=None + The label of the positive class in the binary case. When + `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is + set to 1, otherwise an error will be raised. + Returns ------- auc : float From 2746252f92873994a92a79059de125581d3b8727 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 15 Jun 2020 17:20:20 +0200 Subject: [PATCH 06/17] add versionadded --- sklearn/metrics/_ranking.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 4682fc4a6854c..5f738f01268d2 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -335,6 +335,8 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None, `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is set to 1, otherwise an error will be raised. + .. versionadded:: 0.24 + Returns ------- auc : float From fca877cc6e2665e6182f4ec6a2af7d5f56cb8ba3 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 7 Jul 2020 08:43:45 +0200 Subject: [PATCH 07/17] add test with grid-search --- sklearn/metrics/tests/test_ranking.py | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index dd771570e8481..fba93036e3bd3 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -11,6 +11,7 @@ from sklearn.datasets import load_breast_cancer from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.random_projection import _sparse_random_matrix from sklearn.utils import shuffle from sklearn.utils.extmath import softmax @@ -1510,3 +1511,30 @@ def test_roc_auc_score_pos_label(decision_method): roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label) assert roc_auc == pytest.approx(np.trapz(tpr, fpr)) + + +@pytest.mark.parametrize( + "decision_method", ["predict_proba", "decision_function"] +) +def test_roc_auc_score_pos_label_grid_search(decision_method): + X, y = load_breast_cancer(return_X_y=True) + # create an highly imbalanced + idx_positive = np.flatnonzero(y == 1) + idx_negative = np.flatnonzero(y == 0) + idx_selected = np.hstack([idx_negative, idx_positive[:25]]) + X, y = X[idx_selected], y[idx_selected] + X, y = shuffle(X, y, random_state=42) + # only use 2 features to make the problem even harder + X = X[:, :2] + y = np.array( + ["cancer" if c == 1 else "not cancer" for c in y], dtype=object + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, stratify=y, random_state=0, + ) + + param_grid = {"C": [0.1, 1]} + classifier = GridSearchCV( + LogisticRegression(), param_grid=param_grid, scoring="roc_auc", + ) + classifier.fit(X_train, y_train) From 28cb7c80aaeb4f58322dc63960ea5cca308ba454 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 11:14:17 +0200 Subject: [PATCH 08/17] iter --- sklearn/metrics/_scorer.py | 23 ++++++- sklearn/metrics/tests/test_score_objects.py | 72 ++++++++++++++++++++- 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index b824b9b0cbcb8..8dc8fb4b8293a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -239,7 +239,13 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): y_pred = method_caller(clf, "predict_proba", X) if y_type == "binary": if y_pred.shape[1] == 2: - y_pred = y_pred[:, 1] + if "pos_label" in self._kwargs: + col_idx = np.flatnonzero( + clf.classes_ == self._kwargs["pos_label"] + )[0] + else: + col_idx = 1 + y_pred = y_pred[:, col_idx] elif y_pred.shape[1] == 1: # not multiclass raise ValueError('got predict_proba of shape {},' ' but need classifier with two' @@ -296,6 +302,13 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): y_pred = method_caller(clf, "predict", X) else: try: + if ( + y_type == "binary" + and self._score_func.__name__ == "roc_auc_score" + and "pos_label" not in self._kwargs + ): + self._kwargs["pos_label"] = clf.classes_[1] + y_pred = method_caller(clf, "decision_function", X) # For multi-output multi-class estimator @@ -307,7 +320,13 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): if y_type == "binary": if y_pred.shape[1] == 2: - y_pred = y_pred[:, 1] + if "pos_label" in self._kwargs: + col_idx = np.flatnonzero( + clf.classes_ == self._kwargs["pos_label"] + )[0] + else: + col_idx = 1 + y_pred = y_pred[:, col_idx] else: raise ValueError('got predict_proba of shape {},' ' but need classifier with two' diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 67900b7cb77c3..0f2c840f8467c 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -3,6 +3,7 @@ import shutil import os import numbers +from copy import deepcopy from unittest.mock import Mock from functools import partial @@ -11,6 +12,7 @@ import joblib from numpy.testing import assert_allclose +from sklearn.utils import shuffle from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings @@ -32,10 +34,12 @@ from sklearn.cluster import KMeans from sklearn.linear_model import Ridge, LogisticRegression, Perceptron from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_breast_cancer from sklearn.datasets import make_blobs -from sklearn.datasets import make_classification, make_regression +from sklearn.datasets import make_classification from sklearn.datasets import make_multilabel_classification -from sklearn.datasets import load_diabetes +from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split, cross_val_score from sklearn.model_selection import GridSearchCV from sklearn.multiclass import OneVsRestClassifier @@ -747,3 +751,67 @@ def test_multiclass_roc_no_proba_scorer_errors(scorer_name): msg = "'Perceptron' object has no attribute 'predict_proba'" with pytest.raises(AttributeError, match=msg): scorer(lr, X, y) + + +@pytest.mark.parametrize( + "scoring, is_symmetric", + [ + ("roc_auc", True), + ("jaccard", False), + ("f1", False), + ("average_precision", False), + ("precision", False), + ("recall", False), + ("neg_brier_score", True), + ], +) +def test_scorer_pos_label_grid_search(scoring, is_symmetric): + # check the behaviour for the scorer which requires a `pos_label` with + # binary target + X, y = load_breast_cancer(return_X_y=True) + # create an highly imbalanced + idx_positive = np.flatnonzero(y == 1) + idx_negative = np.flatnonzero(y == 0) + idx_selected = np.hstack([idx_negative, idx_positive[:25]]) + X, y = X[idx_selected], y[idx_selected] + X, y = shuffle(X, y, random_state=42) + # only use 2 features to make the problem even harder + X = X[:, :2] + y = np.array( + ["cancer" if c == 1 else "not cancer" for c in y], dtype=object + ) + + param_grid = {"max_depth": [1, 2, 3, 4, 5]} + classifier = GridSearchCV( + DecisionTreeClassifier(), param_grid=param_grid, scoring=scoring, + ) + + if is_symmetric: + # we will expand to compute for several scorer with different pos_label + # which should all give the same results + scorer = get_scorer(scoring) + scorer_pos_label, scorer_neg_label = deepcopy(scorer), deepcopy(scorer) + + scorer_pos_label._kwargs["pos_label"] = "cancer" + scorer_neg_label._kwargs["pos_label"] = "not cancer" + multi_scoring = { + "scorer_str": scorer, + "scorer_pos": scorer_pos_label, + "scorer_neg": scorer_neg_label, + } + + classifier.set_params( + scoring=multi_scoring, refit="scorer_str", + ) + classifier.fit(X, y) + assert_allclose( + classifier.cv_results_["mean_test_scorer_str"], + classifier.cv_results_["mean_test_scorer_pos"] + ) + assert_allclose( + classifier.cv_results_["mean_test_scorer_str"], + classifier.cv_results_["mean_test_scorer_neg"] + ) + else: + with pytest.raises(ValueError): + classifier.fit(X, y) From 419c3005e5102e1f779cfac2749bad46b5b7135f Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 11:47:21 +0200 Subject: [PATCH 09/17] iter --- sklearn/metrics/tests/test_ranking.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index fba93036e3bd3..211bb5e5f4bdf 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -1511,30 +1511,3 @@ def test_roc_auc_score_pos_label(decision_method): roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label) assert roc_auc == pytest.approx(np.trapz(tpr, fpr)) - - -@pytest.mark.parametrize( - "decision_method", ["predict_proba", "decision_function"] -) -def test_roc_auc_score_pos_label_grid_search(decision_method): - X, y = load_breast_cancer(return_X_y=True) - # create an highly imbalanced - idx_positive = np.flatnonzero(y == 1) - idx_negative = np.flatnonzero(y == 0) - idx_selected = np.hstack([idx_negative, idx_positive[:25]]) - X, y = X[idx_selected], y[idx_selected] - X, y = shuffle(X, y, random_state=42) - # only use 2 features to make the problem even harder - X = X[:, :2] - y = np.array( - ["cancer" if c == 1 else "not cancer" for c in y], dtype=object - ) - X_train, X_test, y_train, y_test = train_test_split( - X, y, stratify=y, random_state=0, - ) - - param_grid = {"C": [0.1, 1]} - classifier = GridSearchCV( - LogisticRegression(), param_grid=param_grid, scoring="roc_auc", - ) - classifier.fit(X_train, y_train) From 67c7a3ebb510a18d15772e224964eeb1cee78504 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 11:49:06 +0200 Subject: [PATCH 10/17] PEP8 --- sklearn/metrics/tests/test_ranking.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 211bb5e5f4bdf..dd771570e8481 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -11,7 +11,6 @@ from sklearn.datasets import load_breast_cancer from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV from sklearn.random_projection import _sparse_random_matrix from sklearn.utils import shuffle from sklearn.utils.extmath import softmax From 8abad1c9884f1f14b3acf9e7eb20f3bddb320a05 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 11:51:59 +0200 Subject: [PATCH 11/17] add link to issue --- sklearn/metrics/tests/test_score_objects.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 0f2c840f8467c..70e189e9033ba 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -766,8 +766,9 @@ def test_multiclass_roc_no_proba_scorer_errors(scorer_name): ], ) def test_scorer_pos_label_grid_search(scoring, is_symmetric): - # check the behaviour for the scorer which requires a `pos_label` with - # binary target + # Check the behaviour for the scorer which requires a `pos_label` with + # binary target. Non-regression test for: + # https://github.com/scikit-learn/scikit-learn/pull/17572 X, y = load_breast_cancer(return_X_y=True) # create an highly imbalanced idx_positive = np.flatnonzero(y == 1) From 113034f19e1a9667d8465d0c8f0c1b6e863838ce Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 12:10:51 +0200 Subject: [PATCH 12/17] TST make sure that pos_label is computing the right thing --- sklearn/metrics/tests/test_score_objects.py | 31 +++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 70e189e9033ba..6f5d46122ff43 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -782,9 +782,12 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): ["cancer" if c == 1 else "not cancer" for c in y], dtype=object ) - param_grid = {"max_depth": [1, 2, 3, 4, 5]} + param_grid = {"max_depth": [1, 3, 5]} classifier = GridSearchCV( - DecisionTreeClassifier(), param_grid=param_grid, scoring=scoring, + DecisionTreeClassifier(random_state=0), + param_grid=param_grid, + scoring=scoring, + cv=2, ) if is_symmetric: @@ -815,4 +818,28 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): ) else: with pytest.raises(ValueError): + # it should raise an error by default classifier.fit(X, y) + # passing pos_label should solve the issue and should be equivalent to + # encode the label with 0, 1 + + # we should control our cv indices since y will be different leading + # to different cv split + indices = np.arange(y.shape[0]) + cv = [ + (indices[: indices.size // 2], indices[indices.size // 2 :]), + (indices[indices.size // 2 :], indices[: indices.size // 2]), + ] + classifier.set_params(cv=cv) + + y_encoded = y == "cancer" + classifier.fit(X, y_encoded) + mean_test_score_y_encoded = classifier.cv_results_["mean_test_score"] + + scorer = get_scorer(scoring) + scorer._kwargs["pos_label"] = "cancer" + classifier.set_params(scoring=scorer) + classifier.fit(X, y) + mean_test_score_pos_label = classifier.cv_results_["mean_test_score"] + + assert_allclose(mean_test_score_pos_label, mean_test_score_y_encoded) From ebbbd84a13680ee001ad612e8b90a3c0a44ddda2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 12:34:24 +0200 Subject: [PATCH 13/17] iter --- sklearn/metrics/tests/test_score_objects.py | 45 +++++++++++---------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 6f5d46122ff43..57dbee0eb08ce 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -820,26 +820,27 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): with pytest.raises(ValueError): # it should raise an error by default classifier.fit(X, y) - # passing pos_label should solve the issue and should be equivalent to - # encode the label with 0, 1 - - # we should control our cv indices since y will be different leading - # to different cv split - indices = np.arange(y.shape[0]) - cv = [ - (indices[: indices.size // 2], indices[indices.size // 2 :]), - (indices[indices.size // 2 :], indices[: indices.size // 2]), - ] - classifier.set_params(cv=cv) - - y_encoded = y == "cancer" - classifier.fit(X, y_encoded) - mean_test_score_y_encoded = classifier.cv_results_["mean_test_score"] - scorer = get_scorer(scoring) - scorer._kwargs["pos_label"] = "cancer" - classifier.set_params(scoring=scorer) - classifier.fit(X, y) - mean_test_score_pos_label = classifier.cv_results_["mean_test_score"] - - assert_allclose(mean_test_score_pos_label, mean_test_score_y_encoded) + # passing pos_label should always solve the issue and should be equivalent + # to encode the label with {0, 1}. + + # we should control our cv indices since y will be different leading + # to different cv split + indices = np.arange(y.shape[0]) + cv = [ + (indices[: indices.size // 2], indices[indices.size // 2 :]), + (indices[indices.size // 2 :], indices[: indices.size // 2]), + ] + classifier.set_params(cv=cv, scoring=scoring, refit=True) + + y_encoded = (y == "cancer").astype(int) + classifier.fit(X, y_encoded) + mean_test_score_y_encoded = classifier.cv_results_["mean_test_score"] + + scorer = get_scorer(scoring) + scorer._kwargs["pos_label"] = "cancer" + classifier.set_params(scoring=scorer) + classifier.fit(X, y) + mean_test_score_pos_label = classifier.cv_results_["mean_test_score"] + + assert_allclose(mean_test_score_pos_label, mean_test_score_y_encoded) From 89eff6680ac97c418c9a61ec5d47cdcc3ad5bde1 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 12:58:49 +0200 Subject: [PATCH 14/17] PEP8 --- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 57dbee0eb08ce..f3a63a46363ed 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -793,7 +793,7 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): if is_symmetric: # we will expand to compute for several scorer with different pos_label # which should all give the same results - scorer = get_scorer(scoring) + scorer = deepcopy(get_scorer(scoring)) scorer_pos_label, scorer_neg_label = deepcopy(scorer), deepcopy(scorer) scorer_pos_label._kwargs["pos_label"] = "cancer" @@ -828,8 +828,8 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): # to different cv split indices = np.arange(y.shape[0]) cv = [ - (indices[: indices.size // 2], indices[indices.size // 2 :]), - (indices[indices.size // 2 :], indices[: indices.size // 2]), + (indices[:indices.size // 2], indices[indices.size // 2:]), + (indices[indices.size // 2:], indices[:indices.size // 2]), ] classifier.set_params(cv=cv, scoring=scoring, refit=True) @@ -837,7 +837,7 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): classifier.fit(X, y_encoded) mean_test_score_y_encoded = classifier.cv_results_["mean_test_score"] - scorer = get_scorer(scoring) + scorer = deepcopy(get_scorer(scoring)) scorer._kwargs["pos_label"] = "cancer" classifier.set_params(scoring=scorer) classifier.fit(X, y) From 4ebeb8796b5cc181912a54cc64f38110b328dbf7 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 13:54:49 +0200 Subject: [PATCH 15/17] need to make a deepcopy --- sklearn/metrics/_scorer.py | 5 +++-- sklearn/metrics/tests/test_score_objects.py | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 8dc8fb4b8293a..d07eb31596e2b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,9 +18,10 @@ # Arnaud Joly # License: Simplified BSD +from collections import Counter from collections.abc import Iterable +from copy import deepcopy from functools import partial -from collections import Counter import numpy as np @@ -364,7 +365,7 @@ def get_scorer(scoring): """ if isinstance(scoring, str): try: - scorer = SCORERS[scoring] + scorer = deepcopy(SCORERS[scoring]) except KeyError: raise ValueError('%r is not a valid scoring value. ' 'Use sorted(sklearn.metrics.SCORERS.keys()) ' diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index f3a63a46363ed..48b01e638ddc0 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -793,13 +793,12 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): if is_symmetric: # we will expand to compute for several scorer with different pos_label # which should all give the same results - scorer = deepcopy(get_scorer(scoring)) + scorer = get_scorer(scoring) scorer_pos_label, scorer_neg_label = deepcopy(scorer), deepcopy(scorer) - scorer_pos_label._kwargs["pos_label"] = "cancer" scorer_neg_label._kwargs["pos_label"] = "not cancer" multi_scoring = { - "scorer_str": scorer, + "scorer_str": scoring, "scorer_pos": scorer_pos_label, "scorer_neg": scorer_neg_label, } @@ -837,7 +836,7 @@ def test_scorer_pos_label_grid_search(scoring, is_symmetric): classifier.fit(X, y_encoded) mean_test_score_y_encoded = classifier.cv_results_["mean_test_score"] - scorer = deepcopy(get_scorer(scoring)) + scorer = get_scorer(scoring) scorer._kwargs["pos_label"] = "cancer" classifier.set_params(scoring=scorer) classifier.fit(X, y) From d94f1fa39287d9cdf1397f6a9492a54eca7b6e39 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 14:17:10 +0200 Subject: [PATCH 16/17] iter --- sklearn/metrics/_scorer.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index d07eb31596e2b..e91522f10b329 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -303,13 +303,6 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): y_pred = method_caller(clf, "predict", X) else: try: - if ( - y_type == "binary" - and self._score_func.__name__ == "roc_auc_score" - and "pos_label" not in self._kwargs - ): - self._kwargs["pos_label"] = clf.classes_[1] - y_pred = method_caller(clf, "decision_function", X) # For multi-output multi-class estimator @@ -321,6 +314,12 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): if y_type == "binary": if y_pred.shape[1] == 2: + if ( + self._score_func.__name__ == "roc_auc_score" + and "pos_label" not in self._kwargs + ): + self._kwargs["pos_label"] = clf.classes_[1] + if "pos_label" in self._kwargs: col_idx = np.flatnonzero( clf.classes_ == self._kwargs["pos_label"] @@ -371,7 +370,7 @@ def get_scorer(scoring): 'Use sorted(sklearn.metrics.SCORERS.keys()) ' 'to get valid options.' % scoring) else: - scorer = scoring + scorer = deepcopy(scoring) return scorer From 33dfd93e674f554a610ed52e704910213fb2f91b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 17 Jul 2020 14:23:50 +0200 Subject: [PATCH 17/17] iter --- sklearn/metrics/_scorer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index e91522f10b329..b40ad6c23db69 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -364,14 +364,14 @@ def get_scorer(scoring): """ if isinstance(scoring, str): try: - scorer = deepcopy(SCORERS[scoring]) + scorer = SCORERS[scoring] except KeyError: raise ValueError('%r is not a valid scoring value. ' 'Use sorted(sklearn.metrics.SCORERS.keys()) ' 'to get valid options.' % scoring) else: - scorer = deepcopy(scoring) - return scorer + scorer = scoring + return deepcopy(scorer) def _passthrough_scorer(estimator, *args, **kwargs):