Skip to content

ENH Add replace_undefined_by to accuracy_score #31187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- :func:`~metrics.accuracy_score` now has a `replace_undefined_by` param, that can be
set to define the function's return value when the metric is undefined.
By :user:`Stefanie Senger <StefanieSenger>`
53 changes: 40 additions & 13 deletions sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

import math
import warnings
from numbers import Integral, Real

Expand Down Expand Up @@ -291,10 +292,13 @@ def _validate_multiclass_probabilistic_prediction(
"y_pred": ["array-like", "sparse matrix"],
"normalize": ["boolean"],
"sample_weight": ["array-like", None],
"replace_undefined_by": [Interval(Real, 0.0, 1.0, closed="both"), np.nan],
},
prefer_skip_nested_validation=True,
)
def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
def accuracy_score(
y_true, y_pred, *, normalize=True, sample_weight=None, replace_undefined_by=0.0
):
"""Accuracy classification score.

In multilabel classification, this function computes subset accuracy:
Expand All @@ -318,14 +322,26 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.

replace_undefined_by : np.nan, int 0, float in `[0.0, 1.0]`, default=0.0
Sets the return value when `y_true` and `y_pred` are empty and the metric is
thus ill-defined. Can take the following values:

- `np.nan` to return `np.nan`
- a floating point value in the range of `[0.0, 1.0]`

Note that with `normalize=False` only `np.nan` or `0.0` can be returned
regardless of the value set, since `0.0 ≤ accuracy_score ≤ number of samples`
and here, `y_true` and `y_pred` are empty.

.. versionadded:: 1.7

Returns
-------
score : float or int
If ``normalize == True``, return the fraction of correctly
classified samples (float), else returns the number of correctly
classified samples (int).
score : float
If ``normalize == True``, return the fraction of correctly classified samples,
else returns the number of correctly classified samples.

The best performance is 1 with ``normalize == True`` and the number
The best performance is 1.0 with ``normalize == True`` and the number
of samples with ``normalize == False``.

See Also
Expand Down Expand Up @@ -360,6 +376,18 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
check_consistent_length(y_true, y_pred, sample_weight)

if _num_samples(y_true) == 0:
msg = (
"`y_true` and `y_pred` are empty. `accuracy_score` is undefined and "
"set to the value defined in the `replace_undefined_by` param, which "
"defaults to 0.0."
)
warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
if normalize:
return replace_undefined_by
else:
return replace_undefined_by if math.isnan(replace_undefined_by) else 0.0

if y_type.startswith("multilabel"):
differing_labels = _count_nonzero(y_true - y_pred, xp=xp, device=device, axis=1)
score = xp.asarray(differing_labels == 0, device=device)
Expand Down Expand Up @@ -1210,9 +1238,8 @@ def matthews_corrcoef(y_true, y_pred, *, sample_weight=None):
def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
"""Zero-one classification loss.

If normalize is ``True``, return the fraction of misclassifications
(float), else it returns the number of misclassifications (int). The best
performance is 0.
If normalize is ``True``, returns the fraction of misclassifications, else returns
the number of misclassifications. The best performance is 0.

Read more in the :ref:`User Guide <zero_one_loss>`.

Expand All @@ -1233,9 +1260,9 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):

Returns
-------
loss : float or int,
If ``normalize == True``, return the fraction of misclassifications
(float), else it returns the number of misclassifications (int).
loss : float,
If ``normalize == True``, returns the fraction of misclassifications, else
returns the number of misclassifications.

See Also
--------
Expand Down Expand Up @@ -3081,7 +3108,7 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):

Returns
-------
loss : float or int
loss : float
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there return types are changed. I agree it should be always float for all of them, but it'd be nice to have a test for all the cases to make sure it's actually float.

Copy link
Contributor Author

@StefanieSenger StefanieSenger Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do this in a separate PR.

These tests had already been added in #30575.

Return the average Hamming loss between element of ``y_true`` and
``y_pred``.

Expand Down
37 changes: 37 additions & 0 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import re
import warnings
from functools import partial
Expand Down Expand Up @@ -251,6 +252,42 @@ def test_multilabel_accuracy_score_subset_accuracy():
assert accuracy_score(y2, np.zeros(y1.shape)) == 0


@pytest.mark.parametrize("replace_undefined_by", [0.0, 0.5, np.nan])
def test_accuracy_score_undefined(replace_undefined_by):
"""Test that accuracy_score returns the argument set in the `replace_undefined_by`
param when the metric is undefined."""

def check_equal(res, exp):
if np.isnan(res) and np.isnan(exp):
return True
return res == exp

y_true = y_pred = np.array([])

acc = accuracy_score(y_true, y_pred, replace_undefined_by=replace_undefined_by)
assert check_equal(acc, replace_undefined_by)

acc = accuracy_score(
y_true, y_pred, normalize=False, replace_undefined_by=replace_undefined_by
)
if math.isnan(replace_undefined_by):
assert check_equal(acc, np.nan)
else:
# can only return 0 to stay true to the range of possible output values if the
# metric was defined:
assert acc == 0


def test_accuracy_score_undefined_raises_warning():
"""Test that accuracy_score raises UndefinedMetricWarning when y_true and y_pred are
empty."""
with pytest.warns(
UndefinedMetricWarning,
match="`y_true` and `y_pred` are empty. `accuracy_score` is undefined",
):
accuracy_score(np.array([]), np.array([]))


def test_precision_recall_f1_score_binary():
# Test Precision Recall and F1 Score for binary classification task
y_true, y_pred, _ = make_prediction(binary=True)
Expand Down