Skip to content

BUG Fixes error with multiclass roc auc scorer #15274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 2, 2019
8 changes: 5 additions & 3 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,11 @@ Changelog
Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme
Dockès <jeromedockes>`.

- |Feature| Added multiclass support to :func:`metrics.roc_auc_score`.
:issue:`12789` by :user:`Kathy Chen <kathyxchen>`,
:user:`Mohamed Maskani <maskani-moh>`, and :user:`Thomas Fan <thomasjpfan>`.
- |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with
corresponding scorers 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted',
and 'roc_auc_ovo_weighted'. :pr:`12789` and :pr:`15274` by
:user:`Kathy Chen <kathyxchen>`, :user:`Mohamed Maskani <maskani-moh>`, and
`Thomas Fan`_.

- |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the
Tweedie deviance for a given ``power`` parameter. Also add mean Poisson
Expand Down
10 changes: 5 additions & 5 deletions sklearn/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
if y_type == "binary":
if y_pred.shape[1] == 2:
y_pred = y_pred[:, 1]
else:
elif y_pred.shape[1] == 1: # not multiclass
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, why is this useful?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the blame, this was added in #12486 to resolve #7598

It looks like it was trying to get a better error message for the y_pred.shape[1]==1 case.

raise ValueError('got predict_proba of shape {},'
' but need classifier with two'
' classes for {} scoring'.format(
Expand Down Expand Up @@ -645,14 +645,14 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
needs_threshold=True)
average_precision_scorer = make_scorer(average_precision_score,
needs_threshold=True)
roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_threshold=True,
roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True,
multi_class='ovo')
roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
multi_class='ovo',
average='weighted')
roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_threshold=True,
roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True,
multi_class='ovr')
roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_threshold=True,
roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
multi_class='ovr',
average='weighted')

Expand Down
51 changes: 50 additions & 1 deletion sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import numbers
from unittest.mock import Mock
from functools import partial

import numpy as np
import pytest
Expand All @@ -29,7 +30,7 @@
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.cluster import KMeans
from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.linear_model import Ridge, LogisticRegression, Perceptron
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.datasets import make_blobs
from sklearn.datasets import make_classification
Expand Down Expand Up @@ -670,3 +671,51 @@ def test_multimetric_scorer_sanity_check():
for key, value in result.items():
score_name = scorers[key]
assert_allclose(value, seperate_scores[score_name])


@pytest.mark.parametrize('scorer_name, metric', [
('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')),
('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')),
('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr',
average='weighted')),
('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo',
average='weighted'))])
def test_multiclass_roc_proba_scorer(scorer_name, metric):
scorer = get_scorer(scorer_name)
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
random_state=0)
lr = LogisticRegression(multi_class="multinomial").fit(X, y)
y_proba = lr.predict_proba(X)
expected_score = metric(y, y_proba)

assert scorer(lr, X, y) == pytest.approx(expected_score)


def test_multiclass_roc_proba_scorer_label():
scorer = make_scorer(roc_auc_score, multi_class='ovo',
labels=[0, 1, 2], needs_proba=True)
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
random_state=0)
lr = LogisticRegression(multi_class="multinomial").fit(X, y)
y_proba = lr.predict_proba(X)

y_binary = y == 0
expected_score = roc_auc_score(y_binary, y_proba,
multi_class='ovo',
labels=[0, 1, 2])

assert scorer(lr, X, y_binary) == pytest.approx(expected_score)


@pytest.mark.parametrize('scorer_name', [
'roc_auc_ovr', 'roc_auc_ovo',
'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])
def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
Copy link
Member

@qinhanmin2014 qinhanmin2014 Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please tell me why multiclass roc_auc_score do not support the output of decision_function? thanks

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The paper this was based on used probabilities for ranking: https://link.springer.com/content/pdf/10.1023%2FA%3A1010920819831.pdf

For reference this was discussed in the original issue: #7663 (comment)

# Perceptron has no predict_proba
scorer = get_scorer(scorer_name)
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
random_state=0)
lr = Perceptron().fit(X, y)
msg = "'Perceptron' object has no attribute 'predict_proba'"
with pytest.raises(AttributeError, match=msg):
scorer(lr, X, y)