From 5a000a04278b20ab70815ab955f31fb2c4a79ef6 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 10 Apr 2025 16:07:58 +0200 Subject: [PATCH 01/14] ENH Add zero division handling to cohen_kappa_score --- sklearn/metrics/_classification.py | 36 ++++++++++-- sklearn/metrics/tests/test_classification.py | 60 ++++++++++++++++++++ 2 files changed, 91 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 30dd53bc16109..2dd591443aa90 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -799,10 +799,13 @@ def multilabel_confusion_matrix( "labels": ["array-like", None], "weights": [StrOptions({"linear", "quadratic"}), None], "sample_weight": ["array-like", None], + "replace_undefined_by": [Interval(Real, -1.0, 1.0, closed="both"), np.nan], }, prefer_skip_nested_validation=True, ) -def cohen_kappa_score(y1, y2, *, labels=None, weights=None, sample_weight=None): +def cohen_kappa_score( + y1, y2, *, labels=None, weights=None, sample_weight=None, replace_undefined_by=0.0 +): r"""Compute Cohen's kappa: a statistic that measures inter-annotator agreement. This function computes Cohen's kappa [1]_, a score that expresses the level @@ -841,11 +844,24 @@ class labels [2]_. sample_weight : array-like of shape (n_samples,), default=None Sample weights. + replace_undefined_by : np.nan, float in [-1.0, 1.0], default=0.0 + Sets the return value when a division by zero would occur. This can happen for + instance on empty input arrays, or when no label of interest (as defined in the + `labels` param) is assigned by the second annotator, or when both `y1` and `y2` + only have one label in common that is also in `labels`. Can take the + following values: + + - `np.nan` to return `np.nan` + - a floating point value in the range of [-1.0, 1.0] to return a specific value + + .. versionadded:: 1.8 + Returns ------- kappa : float - The kappa statistic, which is a number between -1 and 1. The maximum - value means complete agreement; zero or lower means chance agreement. + The kappa statistic, which is a number between -1.0 and 1.0. The maximum value + means complete agreement; the minimum value means complete disagreement; 0.0 + indicates no agreement beyond what would be expected by chance. References ---------- @@ -870,7 +886,12 @@ class labels [2]_. n_classes = confusion.shape[0] sum0 = np.sum(confusion, axis=0) sum1 = np.sum(confusion, axis=1) - expected = np.outer(sum0, sum1) / np.sum(sum0) + + numerator = np.outer(sum0, sum1) + denominator = np.sum(sum0) + if np.isclose(denominator, 0): + return replace_undefined_by + expected = numerator / denominator if weights is None: w_mat = np.ones([n_classes, n_classes], dtype=int) @@ -883,7 +904,12 @@ class labels [2]_. else: w_mat = (w_mat - w_mat.T) ** 2 - k = np.sum(w_mat * confusion) / np.sum(w_mat * expected) + numerator = np.sum(w_mat * confusion) + denominator = np.sum(w_mat * expected) + if np.isclose(denominator, 0): + return replace_undefined_by + k = numerator / denominator + return float(1 - k) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 13fe8b3deb88e..0f102d27356c4 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -926,6 +926,66 @@ def test_cohen_kappa(): ) +@pytest.mark.parametrize("replace_undefined_by", [0.0, np.nan]) +def test_cohen_kappa_zero_division(replace_undefined_by): + """Test that cohen_kappa_score handles divisions by 0 correctly returning the + `replace_undefined_by` param.""" + + def check_equal(res, exp): + if np.isnan(res) and np.isnan(exp): + return True + return res == exp + + # test case: empty inputs + y1 = np.array([]) + y2 = np.array([]) + assert check_equal( + cohen_kappa_score(y1, y2, replace_undefined_by=replace_undefined_by), + replace_undefined_by, + ) + + # test case: annotator y2 does not assign any label specified in `labels` + labels = [1, 2] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([3] * 10) + assert check_equal( + cohen_kappa_score( + y1, y2, labels=labels, replace_undefined_by=replace_undefined_by + ), + replace_undefined_by, + ) + + # test case: both inputs only have one label + y1 = np.array([3] * 10) + y2 = np.array([3] * 10) + assert check_equal( + cohen_kappa_score(y1, y2, replace_undefined_by=replace_undefined_by), + replace_undefined_by, + ) + + # test case: both inputs only have one label in common with `labels` + labels = [1] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([1] * 5 + [3] * 5) + assert check_equal( + cohen_kappa_score( + y1, y2, labels=labels, replace_undefined_by=replace_undefined_by + ), + replace_undefined_by, + ) + # with weights="quadratic" it is almost the same test: skipped here + assert check_equal( + cohen_kappa_score( + y1, + y2, + labels=labels, + weights="linear", + replace_undefined_by=replace_undefined_by, + ), + replace_undefined_by, + ) + + @pytest.mark.parametrize("zero_division", [0, 1, np.nan]) @pytest.mark.parametrize("y_true, y_pred", [([0], [0])]) @pytest.mark.parametrize( From 02fd573b711c247df9c55644d38a5e206f936cd4 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 10 Apr 2025 16:23:16 +0200 Subject: [PATCH 02/14] add changelog --- .../upcoming_changes/sklearn.metrics/31172.enhancement.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst new file mode 100644 index 0000000000000..8caa3169d63d6 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst @@ -0,0 +1,3 @@ +- :func:`~metrics.cohen_kappa_score` now has a `replace_undefined_by` param, that can be + set to define the function's behaviour when there would be a division by zero. + By :user:`Stefanie Senger ` From 2d84ded7c9b4b0ecc047748be457fa2b1363ca99 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 10 Apr 2025 17:06:36 +0200 Subject: [PATCH 03/14] add warnings raised in case of zero division --- sklearn/metrics/_classification.py | 15 +++++++++- sklearn/metrics/tests/test_classification.py | 29 ++++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2dd591443aa90..092606ac6aef8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -848,7 +848,8 @@ class labels [2]_. Sets the return value when a division by zero would occur. This can happen for instance on empty input arrays, or when no label of interest (as defined in the `labels` param) is assigned by the second annotator, or when both `y1` and `y2` - only have one label in common that is also in `labels`. Can take the + only have one label in common that is also in `labels`. In these cases, an + :class:`~sklearn.exceptions.UndefinedMetricWarning` is raised. Can take the following values: - `np.nan` to return `np.nan` @@ -890,6 +891,12 @@ class labels [2]_. numerator = np.outer(sum0, sum1) denominator = np.sum(sum0) if np.isclose(denominator, 0): + msg = ( + "`y2` does not contain any label that is also both present in `y1` and in " + "`labels`. cohen_kappa_score is undefined and set to the value defined in " + "the `replace_undefined_by` param, which defaults to 0.0." + ) + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return replace_undefined_by expected = numerator / denominator @@ -907,6 +914,12 @@ class labels [2]_. numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) if np.isclose(denominator, 0): + msg = ( + "`y1` and `y2` only have one label in common that is also in `labels`. " + "cohen_kappa_score is undefined and set to the value defined in the " + "`replace_undefined_by` param, which defaults to 0.0." + ) + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return replace_undefined_by k = numerator / denominator diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 0f102d27356c4..82a67ba0a6eea 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -928,7 +928,7 @@ def test_cohen_kappa(): @pytest.mark.parametrize("replace_undefined_by", [0.0, np.nan]) def test_cohen_kappa_zero_division(replace_undefined_by): - """Test that cohen_kappa_score handles divisions by 0 correctly returning the + """Test that cohen_kappa_score handles divisions by 0 correctly by returning the `replace_undefined_by` param.""" def check_equal(res, exp): @@ -964,7 +964,7 @@ def check_equal(res, exp): ) # test case: both inputs only have one label in common with `labels` - labels = [1] + labels = [1, 2] y1 = np.array([1] * 5 + [2] * 5) y2 = np.array([1] * 5 + [3] * 5) assert check_equal( @@ -986,6 +986,31 @@ def check_equal(res, exp): ) +def test_cohen_kappa_zero_division_warning(): + """Test that cohen_kappa_score raises UndefinedMetricWarning when a division by 0 + occurs.""" + + # test first place to raise warning + labels = [1, 2] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([3] * 10) + with pytest.warns( + UndefinedMetricWarning, + match="`y2` does not contain any label that is also both present in", + ): + cohen_kappa_score(y1, y2, labels=labels) + + # test second place to raise warning + labels = [1, 2] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([1] * 5 + [3] * 5) + with pytest.warns( + UndefinedMetricWarning, + match="`y1` and `y2` only have one label in common that is also in `labels`.", + ): + cohen_kappa_score(y1, y2, labels=labels) + + @pytest.mark.parametrize("zero_division", [0, 1, np.nan]) @pytest.mark.parametrize("y_true, y_pred", [([0], [0])]) @pytest.mark.parametrize( From 4b00d9fbdcdd5656098dc396bdcf2b642e9812d4 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 10 Apr 2025 17:45:14 +0200 Subject: [PATCH 04/14] refine test comments --- sklearn/metrics/tests/test_classification.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 82a67ba0a6eea..cacbcd478259c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -944,7 +944,9 @@ def check_equal(res, exp): replace_undefined_by, ) - # test case: annotator y2 does not assign any label specified in `labels` + # test case: annotator y2 does not assign any label specified in `labels` (note: + # also applicable if labels is default and y2 does not contain any label that is in + # y1) labels = [1, 2] y1 = np.array([1] * 5 + [2] * 5) y2 = np.array([3] * 10) @@ -963,7 +965,9 @@ def check_equal(res, exp): replace_undefined_by, ) - # test case: both inputs only have one label in common with `labels` + # test case: both inputs only have one label in common that is also in `labels` + # (note: weights="linear" and weights="quadratic" are different branches, though the + # latter is so similar to the former that the test is skipped here) labels = [1, 2] y1 = np.array([1] * 5 + [2] * 5) y2 = np.array([1] * 5 + [3] * 5) @@ -973,7 +977,6 @@ def check_equal(res, exp): ), replace_undefined_by, ) - # with weights="quadratic" it is almost the same test: skipped here assert check_equal( cohen_kappa_score( y1, From f58492a6bebeb366b49489c1db99a962d7fe1274 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Thu, 10 Apr 2025 23:02:37 +0200 Subject: [PATCH 05/14] correct version --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 092606ac6aef8..f2c30794cd5f4 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -855,7 +855,7 @@ class labels [2]_. - `np.nan` to return `np.nan` - a floating point value in the range of [-1.0, 1.0] to return a specific value - .. versionadded:: 1.8 + .. versionadded:: 1.7 Returns ------- From 245da3e77dbf315b1c360063c9c60747415fe50f Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 11 Apr 2025 13:11:27 +0200 Subject: [PATCH 06/14] improve docstring of test --- sklearn/metrics/tests/test_classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index cacbcd478259c..8ef9305d1c12c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -929,7 +929,9 @@ def test_cohen_kappa(): @pytest.mark.parametrize("replace_undefined_by", [0.0, np.nan]) def test_cohen_kappa_zero_division(replace_undefined_by): """Test that cohen_kappa_score handles divisions by 0 correctly by returning the - `replace_undefined_by` param.""" + `replace_undefined_by` param. (The fist two tests cover the first possible location + in the function for an occurrence of a division by zero, the second two tests in the + the second possible location in the function.""" def check_equal(res, exp): if np.isnan(res) and np.isnan(exp): From ede386e4d91fd6cc880b3594c8381e44f6d60cd0 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Sat, 12 Apr 2025 17:51:48 +0200 Subject: [PATCH 07/14] wording --- .../upcoming_changes/sklearn.metrics/31172.enhancement.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst index 8caa3169d63d6..606f629655465 100644 --- a/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/31172.enhancement.rst @@ -1,3 +1,3 @@ - :func:`~metrics.cohen_kappa_score` now has a `replace_undefined_by` param, that can be - set to define the function's behaviour when there would be a division by zero. + set to define the function's return value when there would be a division by zero. By :user:`Stefanie Senger ` From b93b44553f10fd5bd1b031f742e0a4a963f36691 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Sat, 12 Apr 2025 18:43:01 +0200 Subject: [PATCH 08/14] add deprecation cycle for default behaviour if zero division --- sklearn/metrics/_classification.py | 31 ++++++++++++++++++-- sklearn/metrics/tests/test_classification.py | 27 +++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index f2c30794cd5f4..f6bbd8a439db0 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -799,12 +799,24 @@ def multilabel_confusion_matrix( "labels": ["array-like", None], "weights": [StrOptions({"linear", "quadratic"}), None], "sample_weight": ["array-like", None], - "replace_undefined_by": [Interval(Real, -1.0, 1.0, closed="both"), np.nan], + "replace_undefined_by": [ + Interval(Real, -1.0, 1.0, closed="both"), + np.nan, + Hidden(StrOptions({"deprecated"})), + ], }, prefer_skip_nested_validation=True, ) +# TODO(1.9): Change default value for `replace_undefined_by` param to 0.0 and remove +# FutureWarnings. def cohen_kappa_score( - y1, y2, *, labels=None, weights=None, sample_weight=None, replace_undefined_by=0.0 + y1, + y2, + *, + labels=None, + weights=None, + sample_weight=None, + replace_undefined_by="deprecated", ): r"""Compute Cohen's kappa: a statistic that measures inter-annotator agreement. @@ -844,7 +856,7 @@ class labels [2]_. sample_weight : array-like of shape (n_samples,), default=None Sample weights. - replace_undefined_by : np.nan, float in [-1.0, 1.0], default=0.0 + replace_undefined_by : np.nan, float in [-1.0, 1.0], default=np.nan Sets the return value when a division by zero would occur. This can happen for instance on empty input arrays, or when no label of interest (as defined in the `labels` param) is assigned by the second annotator, or when both `y1` and `y2` @@ -888,9 +900,19 @@ class labels [2]_. sum0 = np.sum(confusion, axis=0) sum1 = np.sum(confusion, axis=1) + mgs_changing_default = ( + "The default return value of `cohen_kappa_score` in case of a division " + "by zero has been deprecated in 1.7 and will be changed to 0.0 in version " + "1.9. Set `replace_undefined_by=0.0` to use the new default and to silence " + "this Warning." + ) + numerator = np.outer(sum0, sum1) denominator = np.sum(sum0) if np.isclose(denominator, 0): + if replace_undefined_by == "deprecated": + replace_undefined_by = np.nan + warnings.warn(mgs_changing_default, FutureWarning) msg = ( "`y2` does not contain any label that is also both present in `y1` and in " "`labels`. cohen_kappa_score is undefined and set to the value defined in " @@ -914,6 +936,9 @@ class labels [2]_. numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) if np.isclose(denominator, 0): + if replace_undefined_by == "deprecated": + replace_undefined_by = np.nan + warnings.warn(mgs_changing_default, FutureWarning) msg = ( "`y1` and `y2` only have one label in common that is also in `labels`. " "cohen_kappa_score is undefined and set to the value defined in the " diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 8ef9305d1c12c..646d8d343ca2c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -991,6 +991,8 @@ def check_equal(res, exp): ) +# TODO(1.9): remove the @ignore_warnings of the FutureWarning +@ignore_warnings(category=FutureWarning) def test_cohen_kappa_zero_division_warning(): """Test that cohen_kappa_score raises UndefinedMetricWarning when a division by 0 occurs.""" @@ -1016,6 +1018,31 @@ def test_cohen_kappa_zero_division_warning(): cohen_kappa_score(y1, y2, labels=labels) +# TODO(1.9): remove test when deprecation cycle is over +def test_cohen_kappa_score_raise_warning_deprecation(): + """Test that `cohen_kappa_score` raises a `FutureWarning` for the changing default + of the `replace_undefined_by` param.""" + # test first place to raise warning + labels = [1, 2] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([3] * 10) + with pytest.warns( + FutureWarning, + match="The default return value of `cohen_kappa_score` in case of a division", + ): + cohen_kappa_score(y1, y2, labels=labels) + + # test second place to raise warning + labels = [1, 2] + y1 = np.array([1] * 5 + [2] * 5) + y2 = np.array([1] * 5 + [3] * 5) + with pytest.warns( + FutureWarning, + match="The default return value of `cohen_kappa_score` in case of a division", + ): + cohen_kappa_score(y1, y2, labels=labels) + + @pytest.mark.parametrize("zero_division", [0, 1, np.nan]) @pytest.mark.parametrize("y_true, y_pred", [([0], [0])]) @pytest.mark.parametrize( From a7f4ba674668cdadad3264c950754c4b52a55587 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Sat, 19 Apr 2025 07:27:26 +0200 Subject: [PATCH 09/14] fix linting --- sklearn/metrics/tests/test_classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 2faa324c73a46..b8a4cf7921644 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1042,6 +1042,7 @@ def test_cohen_kappa_score_raise_warning_deprecation(): ): cohen_kappa_score(y1, y2, labels=labels) + def test_cohen_kappa_score_error_wrong_label(): """Test that correct error is raised when users pass labels that are not in y1.""" labels = [1, 2] From 6d8e59bd16e3fecbaf29c601098b3fd0be6133d2 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:06:24 +0200 Subject: [PATCH 10/14] Apply suggestions from code review Co-authored-by: Virgil Chan --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 919ed010149e5..2c35fc0cf0277 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -927,7 +927,7 @@ class labels [2]_. replace_undefined_by = np.nan warnings.warn(mgs_changing_default, FutureWarning) msg = ( - "`y2` does not contain any label that is also both present in `y1` and in " + "`y2` contains no labels that are presented in both `y1` and " "`labels`. cohen_kappa_score is undefined and set to the value defined in " "the `replace_undefined_by` param, which defaults to 0.0." ) @@ -953,7 +953,7 @@ class labels [2]_. replace_undefined_by = np.nan warnings.warn(mgs_changing_default, FutureWarning) msg = ( - "`y1` and `y2` only have one label in common that is also in `labels`. " + "`y1`, `y2` and `labels` have only one label in common. " "cohen_kappa_score is undefined and set to the value defined in the " "`replace_undefined_by` param, which defaults to 0.0." ) From 973b219342a2a15b1a7cc536e331a93d804d79fb Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 30 Apr 2025 19:48:50 +0200 Subject: [PATCH 11/14] clean up test and correct warning message --- sklearn/cluster/_agglomerative.py | 2 +- sklearn/metrics/_classification.py | 10 +++++----- sklearn/metrics/tests/test_classification.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index a2365da3669c4..f068dc934151d 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -36,7 +36,7 @@ from ..utils.validation import check_memory, validate_data # mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast' -from . import _hierarchical_fast as _hierarchical +from . import _hierarchical_fast as _hierarchical # type: ignore[attr-defined] from ._feature_agglomeration import AgglomerationTransform ############################################################################### diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2c35fc0cf0277..efe9149204bbf 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -808,7 +808,7 @@ def multilabel_confusion_matrix( prefer_skip_nested_validation=True, ) # TODO(1.9): Change default value for `replace_undefined_by` param to 0.0 and remove -# FutureWarnings. +# FutureWarnings; also the defaults in the warning messages need to be updated. def cohen_kappa_score( y1, y2, @@ -927,9 +927,9 @@ class labels [2]_. replace_undefined_by = np.nan warnings.warn(mgs_changing_default, FutureWarning) msg = ( - "`y2` contains no labels that are presented in both `y1` and " - "`labels`. cohen_kappa_score is undefined and set to the value defined in " - "the `replace_undefined_by` param, which defaults to 0.0." + "`y2` contains no labels that are presented in both `y1` and `labels`." + "cohen_kappa_score is undefined and set to the value defined in " + "the `replace_undefined_by` param, which defaults to `np.nan`." ) warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return replace_undefined_by @@ -955,7 +955,7 @@ class labels [2]_. msg = ( "`y1`, `y2` and `labels` have only one label in common. " "cohen_kappa_score is undefined and set to the value defined in the " - "`replace_undefined_by` param, which defaults to 0.0." + "`replace_undefined_by` param, which defaults to `np.nan`." ) warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) return replace_undefined_by diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index b8a4cf7921644..855129b85ba64 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1003,7 +1003,7 @@ def test_cohen_kappa_zero_division_warning(): y2 = np.array([3] * 10) with pytest.warns( UndefinedMetricWarning, - match="`y2` does not contain any label that is also both present in", + match="`y2` contains no labels that are presented in both `y1` and `labels`.", ): cohen_kappa_score(y1, y2, labels=labels) @@ -1013,7 +1013,7 @@ def test_cohen_kappa_zero_division_warning(): y2 = np.array([1] * 5 + [3] * 5) with pytest.warns( UndefinedMetricWarning, - match="`y1` and `y2` only have one label in common that is also in `labels`.", + match="`y1`, `y2` and `labels` have only one label in common.", ): cohen_kappa_score(y1, y2, labels=labels) From 2ee10a39d9d1cb0ce856314a6c78dfac1f2ab312 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Wed, 30 Apr 2025 20:18:56 +0200 Subject: [PATCH 12/14] leaner test --- sklearn/metrics/tests/test_classification.py | 76 ++++++++------------ 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 855129b85ba64..574445008b02c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -926,65 +926,47 @@ def test_cohen_kappa(): ) +@pytest.mark.parametrize( + "test_case", + [ + # empty inputs: + ([], [], None, None), + # annotator y2 does not assign any label specified in `labels` (note: also + # applicable if `labels` is default and `y2` does not contain any label that is + # in `y1`): + ([1] * 5 + [2] * 5, [3] * 10, [1, 2], None), + # both inputs (`y1` and `y2`) only have one label: + ([3] * 10, [3] * 10, None, None), + # both inputs only have one label in common that is also in `labels`: + ([1] * 5 + [2] * 5, [3] * 10, [1, 2], None), + # like the last test case, but with `weights="linear"` (note that + # weights="linear" and weights="quadratic" are different branches, though the + # latter is so similar to the former that the test case is skipped here): + ([1] * 5 + [2] * 5, [3] * 10, [1, 2], "linear"), + ], +) @pytest.mark.parametrize("replace_undefined_by", [0.0, np.nan]) -def test_cohen_kappa_zero_division(replace_undefined_by): +def test_cohen_kappa_zero_division(test_case, replace_undefined_by): """Test that cohen_kappa_score handles divisions by 0 correctly by returning the - `replace_undefined_by` param. (The fist two tests cover the first possible location - in the function for an occurrence of a division by zero, the second two tests in the - the second possible location in the function.""" + `replace_undefined_by` param. (The fist two test cases cover the first possible + location in the function for an occurrence of a division by zero, the last three + test cases cover a zero division in the the second possible location in the + function.""" - def check_equal(res, exp): + def _check_equal(res, exp): if np.isnan(res) and np.isnan(exp): return True return res == exp - # test case: empty inputs - y1 = np.array([]) - y2 = np.array([]) - assert check_equal( - cohen_kappa_score(y1, y2, replace_undefined_by=replace_undefined_by), - replace_undefined_by, - ) + y1, y2, labels, weights = test_case + y1, y2 = np.array(y1), np.array(y2) - # test case: annotator y2 does not assign any label specified in `labels` (note: - # also applicable if labels is default and y2 does not contain any label that is in - # y1) - labels = [1, 2] - y1 = np.array([1] * 5 + [2] * 5) - y2 = np.array([3] * 10) - assert check_equal( - cohen_kappa_score( - y1, y2, labels=labels, replace_undefined_by=replace_undefined_by - ), - replace_undefined_by, - ) - - # test case: both inputs only have one label - y1 = np.array([3] * 10) - y2 = np.array([3] * 10) - assert check_equal( - cohen_kappa_score(y1, y2, replace_undefined_by=replace_undefined_by), - replace_undefined_by, - ) - - # test case: both inputs only have one label in common that is also in `labels` - # (note: weights="linear" and weights="quadratic" are different branches, though the - # latter is so similar to the former that the test is skipped here) - labels = [1, 2] - y1 = np.array([1] * 5 + [2] * 5) - y2 = np.array([1] * 5 + [3] * 5) - assert check_equal( - cohen_kappa_score( - y1, y2, labels=labels, replace_undefined_by=replace_undefined_by - ), - replace_undefined_by, - ) - assert check_equal( + assert _check_equal( cohen_kappa_score( y1, y2, labels=labels, - weights="linear", + weights=weights, replace_undefined_by=replace_undefined_by, ), replace_undefined_by, From 13af4c8b2cb4b9f59b737f58c31e597107c86670 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Mon, 5 May 2025 12:11:03 +0200 Subject: [PATCH 13/14] Apply suggestions from code review Co-authored-by: Adrin Jalali --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index efe9149204bbf..8b5bd45ee488a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -914,7 +914,7 @@ class labels [2]_. sum1 = np.sum(confusion, axis=1) mgs_changing_default = ( - "The default return value of `cohen_kappa_score` in case of a division " + "`np.nan` as the default return value of `cohen_kappa_score` in case of a division " "by zero has been deprecated in 1.7 and will be changed to 0.0 in version " "1.9. Set `replace_undefined_by=0.0` to use the new default and to silence " "this Warning." From 703eaae958e005ca85376ebb04ce857c3b984640 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Mon, 5 May 2025 12:51:47 +0200 Subject: [PATCH 14/14] deal with zero division in helper function --- sklearn/metrics/_classification.py | 51 +++++++++++--------- sklearn/metrics/tests/test_classification.py | 4 +- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 8b5bd45ee488a..fd2c375abf3a8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -897,6 +897,15 @@ class labels [2]_. >>> cohen_kappa_score(y1, y2) 0.6875 """ + + def _check_zero_division(denominator, replace_undefined_by, msg): + if np.isclose(denominator, 0): + if replace_undefined_by == "deprecated": + replace_undefined_by = np.nan + warnings.warn(mgs_changing_default, FutureWarning) + warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) + return True + try: confusion = confusion_matrix(y1, y2, labels=labels, sample_weight=sample_weight) except ValueError as e: @@ -914,25 +923,22 @@ class labels [2]_. sum1 = np.sum(confusion, axis=1) mgs_changing_default = ( - "`np.nan` as the default return value of `cohen_kappa_score` in case of a division " - "by zero has been deprecated in 1.7 and will be changed to 0.0 in version " - "1.9. Set `replace_undefined_by=0.0` to use the new default and to silence " - "this Warning." + "`np.nan` as the default return value of `cohen_kappa_score` in case of a " + "division by zero has been deprecated in 1.7 and will be changed to 0.0 in " + "version 1.9. Set `replace_undefined_by=0.0` to use the new default and to " + "silence this Warning." ) numerator = np.outer(sum0, sum1) denominator = np.sum(sum0) - if np.isclose(denominator, 0): - if replace_undefined_by == "deprecated": - replace_undefined_by = np.nan - warnings.warn(mgs_changing_default, FutureWarning) - msg = ( - "`y2` contains no labels that are presented in both `y1` and `labels`." - "cohen_kappa_score is undefined and set to the value defined in " - "the `replace_undefined_by` param, which defaults to `np.nan`." - ) - warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) + msg_zero_division = ( + "`y2` contains no labels that are presented in both `y1` and `labels`." + "`cohen_kappa_score` is undefined and set to the value defined by " + "the `replace_undefined_by` param, which defaults to `np.nan`." + ) + if _check_zero_division(denominator, replace_undefined_by, msg_zero_division): return replace_undefined_by + expected = numerator / denominator if weights is None: @@ -948,17 +954,14 @@ class labels [2]_. numerator = np.sum(w_mat * confusion) denominator = np.sum(w_mat * expected) - if np.isclose(denominator, 0): - if replace_undefined_by == "deprecated": - replace_undefined_by = np.nan - warnings.warn(mgs_changing_default, FutureWarning) - msg = ( - "`y1`, `y2` and `labels` have only one label in common. " - "cohen_kappa_score is undefined and set to the value defined in the " - "`replace_undefined_by` param, which defaults to `np.nan`." - ) - warnings.warn(msg, UndefinedMetricWarning, stacklevel=2) + msg_zero_division = ( + "`y1`, `y2` and `labels` have only one label in common. " + "`cohen_kappa_score` is undefined and set to the value defined by the " + "`replace_undefined_by` param, which defaults to `np.nan`." + ) + if _check_zero_division(denominator, replace_undefined_by, msg_zero_division): return replace_undefined_by + k = numerator / denominator return float(1 - k) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 574445008b02c..da163aaef5016 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1010,7 +1010,7 @@ def test_cohen_kappa_score_raise_warning_deprecation(): y2 = np.array([3] * 10) with pytest.warns( FutureWarning, - match="The default return value of `cohen_kappa_score` in case of a division", + match="`np.nan` as the default return value of `cohen_kappa_score` in case of", ): cohen_kappa_score(y1, y2, labels=labels) @@ -1020,7 +1020,7 @@ def test_cohen_kappa_score_raise_warning_deprecation(): y2 = np.array([1] * 5 + [3] * 5) with pytest.warns( FutureWarning, - match="The default return value of `cohen_kappa_score` in case of a division", + match="`np.nan` as the default return value of `cohen_kappa_score` in case of", ): cohen_kappa_score(y1, y2, labels=labels)