From 222110fa12c2ce81c94d76a02919d8e536f4549c Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Fri, 10 May 2024 18:02:29 +0200
Subject: [PATCH 1/9] check_scoring with raise_exc

---
 sklearn/feature_selection/_rfe.py             |  2 +-
 sklearn/linear_model/_logistic.py             |  4 ++--
 sklearn/metrics/_scorer.py                    | 23 +++++++++++++++----
 .../_classification_threshold.py              |  2 +-
 sklearn/model_selection/_search.py            |  2 +-
 sklearn/model_selection/_validation.py        | 14 ++++-------
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
index 7c5cd8d45b8d1..a0f4b74a44671 100644
--- a/sklearn/feature_selection/_rfe.py
+++ b/sklearn/feature_selection/_rfe.py
@@ -539,7 +539,7 @@ class RFECV(RFE):
             ``cv`` default value of None changed from 3-fold to 5-fold.
 
     scoring : str, callable or None, default=None
-        A string (see model evaluation documentation) or
+        A string (see :ref:`scoring_parameter`) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
 
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 055ccc1c6a202..e8755c659af6e 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -636,7 +636,7 @@ def _log_reg_scoring_path(
         values are chosen in a logarithmic scale between 1e-4 and 1e4.
 
     scoring : callable
-        A string (see model evaluation documentation) or
+        A string (see :ref:`scoring_parameter`) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``. For a list of scoring functions
         that can be used, look at :mod:`sklearn.metrics`.
@@ -1521,7 +1521,7 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima
            solver.
 
     scoring : str or callable, default=None
-        A string (see model evaluation documentation) or
+        A string (see :ref:`scoring_parameter`) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``. For a list of scoring functions
         that can be used, look at :mod:`sklearn.metrics`. The
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index bc9d8ab3d651a..61c278c8dcf5c 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -955,10 +955,11 @@ def get_scorer_names():
             None,
         ],
         "allow_none": ["boolean"],
+        "raise_exc": ["boolean"],
     },
     prefer_skip_nested_validation=True,
 )
-def check_scoring(estimator=None, scoring=None, *, allow_none=False):
+def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=True):
     """Determine scorer from user options.
 
     A TypeError will be thrown if the estimator cannot be scored.
@@ -969,7 +970,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False):
         The object to use to fit the data. If `None`, then this function may error
         depending on `allow_none`.
 
-    scoring : str, callable, list, tuple, or dict, default=None
+    scoring : str, callable, list, tuple, set, or dict, default=None
         Scorer to use. If `scoring` represents a single score, one can use:
 
         - a single string (see :ref:`scoring_parameter`);
@@ -985,8 +986,20 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False):
         If None, the provided estimator object's `score` method is used.
 
     allow_none : bool, default=False
-        If no scoring is specified and the estimator has no score function, we
-        can either return None or raise an exception.
+        Whether to return None or raise an error if no `scoring` is specified and the
+        estimator has no `score` method.
+
+    raise_exc : bool, default=True
+        Whether to raise an exception if a subset of the scorers in multimetric scoring
+        fails or return an error code.
+
+        - If set to `True` raises the failing scorer's exception.
+
+        - If set to `False` a formatted string of the exception details is passed as
+          result of the failing scorer(s).
+
+        This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is
+        a str or a callable.
 
     Returns
     -------
@@ -1026,7 +1039,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False):
         return get_scorer(scoring)
     if isinstance(scoring, (list, tuple, set, dict)):
         scorers = _check_multimetric_scoring(estimator, scoring=scoring)
-        return _MultimetricScorer(scorers=scorers)
+        return _MultimetricScorer(scorers=scorers, raise_exc=raise_exc)
     if scoring is None:
         if hasattr(estimator, "score"):
             return _PassthroughScorer(estimator)
diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py
index d5a864da10653..8463bbadee18e 100644
--- a/sklearn/model_selection/_classification_threshold.py
+++ b/sklearn/model_selection/_classification_threshold.py
@@ -636,7 +636,7 @@ class TunedThresholdClassifierCV(BaseThresholdClassifier):
         The objective metric to be optimized. Can be one of:
 
         * a string associated to a scoring function for binary classification
-          (see model evaluation documentation);
+          (see :ref:`scoring_parameter`);
         * a scorer callable object created with :func:`~sklearn.metrics.make_scorer`;
 
     response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index a26ec0786849d..d0dc83dc871d6 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -972,7 +972,7 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
             first_test_score = all_out[0]["test_scores"]
             self.multimetric_ = isinstance(first_test_score, dict)
 
-            # check refit_metric now for a callabe scorer that is multimetric
+            # check refit_metric now for a callable scorer that is multimetric
             if callable(self.scoring) and self.multimetric_:
                 self._check_refit_for_multimetric(first_test_score)
                 refit_metric = self.refit
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 176627ace91d4..28780361bb6f1 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -27,7 +27,7 @@
 from ..base import clone, is_classifier
 from ..exceptions import FitFailedWarning, UnsetMetadataPassedError
 from ..metrics import check_scoring, get_scorer_names
-from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
+from ..metrics._scorer import _MultimetricScorer
 from ..preprocessing import LabelEncoder
 from ..utils import Bunch, _safe_indexing, check_random_state, indexable
 from ..utils._param_validation import (
@@ -352,15 +352,9 @@ def cross_validate(
 
     cv = check_cv(cv, y, classifier=is_classifier(estimator))
 
-    if callable(scoring):
-        scorers = scoring
-    elif scoring is None or isinstance(scoring, str):
-        scorers = check_scoring(estimator, scoring)
-    else:
-        scorers = _check_multimetric_scoring(estimator, scoring)
-        scorers = _MultimetricScorer(
-            scorers=scorers, raise_exc=(error_score == "raise")
-        )
+    scorers = check_scoring(
+        estimator, scoring=scoring, raise_exc=(error_score == "raise")
+    )
 
     if _routing_enabled():
         # For estimators, a MetadataRouter is created in get_metadata_routing

From 7a657ab36466aa9380a806fb4873dcc6a000c18c Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 14 May 2024 14:32:27 +0200
Subject: [PATCH 2/9] add test

---
 doc/whats_new/v1.6.rst                      |  9 ++++++-
 sklearn/metrics/_scorer.py                  |  2 +-
 sklearn/metrics/tests/test_score_objects.py | 28 +++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index 6eda6717b3d1b..1878a95b8abf9 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -38,7 +38,7 @@ See :ref:`array_api` for more details.
 
 **Classes:**
 
-- 
+-
 
 Changelog
 ---------
@@ -54,6 +54,13 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123455 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.metrics`
+......................
+
+- |Enhancement| :func:`sklearn.metrics.check_scoring` now accepts `raise_exc` to specify
+  whether to raise an exception if a subset of the scorers in multimetric scoring fails
+  or to return an error code. :pr:`28992` by :user:`Stefanie Senger <StefanieSenger>`.
+
 Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version 1.5, including:
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 61c278c8dcf5c..65c3f85eed481 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -991,7 +991,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
 
     raise_exc : bool, default=True
         Whether to raise an exception if a subset of the scorers in multimetric scoring
-        fails or return an error code.
+        fails or to return an error code as a str for the failing scorer(s).
 
         - If set to `True` raises the failing scorer's exception.
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 9960c32fc3938..bfe8f57f92265 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1557,6 +1557,34 @@ def test_multimetric_scorer_repr():
     assert str(multi_metric_scorer) == 'MultiMetricScorer("accuracy", "r2")'
 
 
+def test_check_scoring_multimetric_raise_exc():
+    """Test that check_scoring returns error code for a subset of scorers in
+    multimetric scoring if raise_exc=False and raises otherwise."""
+
+    def raising_scorer(estimator, X, y):
+        raise ValueError("That doesn't work.")
+
+    X, y = make_classification(n_samples=150, n_features=10, random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    clf = LogisticRegression().fit(X_train, y_train)
+
+    # "raising_scorer" is raising ValueError and should return an string representation
+    # of the error of the last scorer:
+    scoring = {
+        "accuracy": make_scorer(accuracy_score),
+        "raising_scorer": raising_scorer,
+    }
+    scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False)
+    scores = scoring_call(clf, X_test, y_test)
+    assert "That doesn't work." in scores["raising_scorer"]
+
+    # should raise an error
+    scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=True)
+    err_msg = "That doesn't work."
+    with pytest.raises(ValueError, match=err_msg):
+        scores = scoring_call(clf, X_test, y_test)
+
+
 @pytest.mark.parametrize("enable_metadata_routing", [True, False])
 def test_metadata_routing_multimetric_metadata_routing(enable_metadata_routing):
     """Test multimetric scorer works with and without metadata routing enabled when

From d1efd107dcadc8467337ab0b7259b8ee09b12cdc Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 14 May 2024 14:44:08 +0200
Subject: [PATCH 3/9] mention set

---
 sklearn/metrics/_scorer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 65c3f85eed481..cbc7a258918f5 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -978,7 +978,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
 
         If `scoring` represents multiple scores, one can use:
 
-        - a list or tuple of unique strings;
+        - a list, tuple or set of unique strings;
         - a callable returning a dictionary where the keys are the metric
           names and the values are the metric scorers;
         - a dictionary with metric names as keys and callables a values.

From 2cef99d365aa28227bfbfd17022e5a692d20461e Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 14 May 2024 14:48:39 +0200
Subject: [PATCH 4/9] documentation: add signature of callables

---
 sklearn/metrics/_scorer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index cbc7a258918f5..6054d657d36f3 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -981,7 +981,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
         - a list, tuple or set of unique strings;
         - a callable returning a dictionary where the keys are the metric
           names and the values are the metric scorers;
-        - a dictionary with metric names as keys and callables a values.
+        - a dictionary with metric names as keys and callables a values. The callables
+        need to have the signature `callable(estimator, X, y)`.
 
         If None, the provided estimator object's `score` method is used.
 

From 9cd74d948f562ef3e71c3d5b51625cc7b46fa9b6 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 14 May 2024 14:58:01 +0200
Subject: [PATCH 5/9] fix docstring

---
 sklearn/metrics/_scorer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 6054d657d36f3..829e6db4c3501 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -982,7 +982,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
         - a callable returning a dictionary where the keys are the metric
           names and the values are the metric scorers;
         - a dictionary with metric names as keys and callables a values. The callables
-        need to have the signature `callable(estimator, X, y)`.
+          need to have the signature `callable(estimator, X, y)`.
 
         If None, the provided estimator object's `score` method is used.
 
@@ -992,7 +992,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
 
     raise_exc : bool, default=True
         Whether to raise an exception if a subset of the scorers in multimetric scoring
-        fails or to return an error code as a str for the failing scorer(s).
+        fails or to return an error code.
 
         - If set to `True` raises the failing scorer's exception.
 

From f4b68b73be97b2145be001fc400c732c3ad2e03e Mon Sep 17 00:00:00 2001
From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com>
Date: Tue, 21 May 2024 09:53:32 +0200
Subject: [PATCH 6/9] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_scorer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 829e6db4c3501..552853e182281 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -994,13 +994,14 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
         Whether to raise an exception if a subset of the scorers in multimetric scoring
         fails or to return an error code.
 
-        - If set to `True` raises the failing scorer's exception.
-
-        - If set to `False` a formatted string of the exception details is passed as
+        - If set to `True`, raises the failing scorer's exception.
+        - If set to `False`, a formatted string of the exception details is passed as
           result of the failing scorer(s).
 
         This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is
         a str or a callable.
+        
+        .. versionadded:: 1.6
 
     Returns
     -------

From 549e764bd24824835ac078b3765a5e9bae50b46a Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 21 May 2024 14:22:10 +0200
Subject: [PATCH 7/9] add example

---
 sklearn/metrics/_scorer.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 552853e182281..cb6eec94d464f 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -979,8 +979,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
         If `scoring` represents multiple scores, one can use:
 
         - a list, tuple or set of unique strings;
-        - a callable returning a dictionary where the keys are the metric
-          names and the values are the metric scorers;
+        - a callable returning a dictionary where the keys are the metric names and the
+          values are the metric scorers;
         - a dictionary with metric names as keys and callables a values. The callables
           need to have the signature `callable(estimator, X, y)`.
 
@@ -1000,14 +1000,13 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
 
         This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is
         a str or a callable.
-        
+
         .. versionadded:: 1.6
 
     Returns
     -------
     scoring : callable
-        A scorer callable object / function with signature
-        ``scorer(estimator, X, y)``.
+        A scorer callable object / function with signature ``scorer(estimator, X, y)``.
 
     Examples
     --------
@@ -1019,6 +1018,22 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
     >>> scorer = check_scoring(classifier, scoring='accuracy')
     >>> scorer(classifier, X, y)
     0.96...
+
+    >>> from sklearn.metrics import make_scorer, accuracy_score, mean_squared_log_error
+    >>> X, y = load_iris(return_X_y=True)
+    >>> y *= -1
+    >>> clf = DecisionTreeClassifier().fit(X, y)
+    >>> scoring = {
+    >>>     "accuracy": make_scorer(accuracy_score),
+    >>>     "mean_squared_log_error": make_scorer(mean_squared_log_error),
+    >>> }
+    >>> scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False)
+    >>> scores = scoring_call(clf, X, y)
+    >>> scores
+    {'accuracy': 1.0,
+    'mean_squared_log_error': 'Traceback (most recent call last): ... raise \
+    ValueError(ValueError: Mean Squared Logarithmic Error cannot be used when targets \
+    contain negative values.'} # doctest: +SKIP
     """
     if isinstance(scoring, str):
         return get_scorer(scoring)

From 395d0aead8a032a1b089579f9fbbbf1bac3ddf71 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com>
Date: Tue, 21 May 2024 15:37:00 +0200
Subject: [PATCH 8/9] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_scorer.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index cb6eec94d464f..4b83801e393a7 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -1024,16 +1024,13 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
     >>> y *= -1
     >>> clf = DecisionTreeClassifier().fit(X, y)
     >>> scoring = {
-    >>>     "accuracy": make_scorer(accuracy_score),
-    >>>     "mean_squared_log_error": make_scorer(mean_squared_log_error),
-    >>> }
+    ...     "accuracy": make_scorer(accuracy_score),
+    ...     "mean_squared_log_error": make_scorer(mean_squared_log_error),
+    ... }
     >>> scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False)
     >>> scores = scoring_call(clf, X, y)
     >>> scores
-    {'accuracy': 1.0,
-    'mean_squared_log_error': 'Traceback (most recent call last): ... raise \
-    ValueError(ValueError: Mean Squared Logarithmic Error cannot be used when targets \
-    contain negative values.'} # doctest: +SKIP
+    {'accuracy': 1.0, 'mean_squared_log_error': 'Traceback ...'}
     """
     if isinstance(scoring, str):
         return get_scorer(scoring)

From d9bf3a4535aadcbef1f254ced130d37ade4e71a8 Mon Sep 17 00:00:00 2001
From: Stefanie Senger <stefanie.senger@posteo.de>
Date: Tue, 21 May 2024 15:39:40 +0200
Subject: [PATCH 9/9] add parenthesis

---
 sklearn/metrics/_scorer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 4b83801e393a7..c1a916aa0b5f3 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -991,8 +991,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
         estimator has no `score` method.
 
     raise_exc : bool, default=True
-        Whether to raise an exception if a subset of the scorers in multimetric scoring
-        fails or to return an error code.
+        Whether to raise an exception (if a subset of the scorers in multimetric scoring
+        fails) or to return an error code.
 
         - If set to `True`, raises the failing scorer's exception.
         - If set to `False`, a formatted string of the exception details is passed as