From 222110fa12c2ce81c94d76a02919d8e536f4549c Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Fri, 10 May 2024 18:02:29 +0200 Subject: [PATCH 1/9] check_scoring with raise_exc --- sklearn/feature_selection/_rfe.py | 2 +- sklearn/linear_model/_logistic.py | 4 ++-- sklearn/metrics/_scorer.py | 23 +++++++++++++++---- .../_classification_threshold.py | 2 +- sklearn/model_selection/_search.py | 2 +- sklearn/model_selection/_validation.py | 14 ++++------- 6 files changed, 27 insertions(+), 20 deletions(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 7c5cd8d45b8d1..a0f4b74a44671 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -539,7 +539,7 @@ class RFECV(RFE): ``cv`` default value of None changed from 3-fold to 5-fold. scoring : str, callable or None, default=None - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 055ccc1c6a202..e8755c659af6e 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -636,7 +636,7 @@ def _log_reg_scoring_path( values are chosen in a logarithmic scale between 1e-4 and 1e4. scoring : callable - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. @@ -1521,7 +1521,7 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima solver. scoring : str or callable, default=None - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index bc9d8ab3d651a..61c278c8dcf5c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -955,10 +955,11 @@ def get_scorer_names(): None, ], "allow_none": ["boolean"], + "raise_exc": ["boolean"], }, prefer_skip_nested_validation=True, ) -def check_scoring(estimator=None, scoring=None, *, allow_none=False): +def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=True): """Determine scorer from user options. A TypeError will be thrown if the estimator cannot be scored. @@ -969,7 +970,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): The object to use to fit the data. If `None`, then this function may error depending on `allow_none`. - scoring : str, callable, list, tuple, or dict, default=None + scoring : str, callable, list, tuple, set, or dict, default=None Scorer to use. If `scoring` represents a single score, one can use: - a single string (see :ref:`scoring_parameter`); @@ -985,8 +986,20 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): If None, the provided estimator object's `score` method is used. allow_none : bool, default=False - If no scoring is specified and the estimator has no score function, we - can either return None or raise an exception. + Whether to return None or raise an error if no `scoring` is specified and the + estimator has no `score` method. + + raise_exc : bool, default=True + Whether to raise an exception if a subset of the scorers in multimetric scoring + fails or return an error code. + + - If set to `True` raises the failing scorer's exception. + + - If set to `False` a formatted string of the exception details is passed as + result of the failing scorer(s). + + This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is + a str or a callable. Returns ------- @@ -1026,7 +1039,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): return get_scorer(scoring) if isinstance(scoring, (list, tuple, set, dict)): scorers = _check_multimetric_scoring(estimator, scoring=scoring) - return _MultimetricScorer(scorers=scorers) + return _MultimetricScorer(scorers=scorers, raise_exc=raise_exc) if scoring is None: if hasattr(estimator, "score"): return _PassthroughScorer(estimator) diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index d5a864da10653..8463bbadee18e 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -636,7 +636,7 @@ class TunedThresholdClassifierCV(BaseThresholdClassifier): The objective metric to be optimized. Can be one of: * a string associated to a scoring function for binary classification - (see model evaluation documentation); + (see :ref:`scoring_parameter`); * a scorer callable object created with :func:`~sklearn.metrics.make_scorer`; response_method : {"auto", "decision_function", "predict_proba"}, default="auto" diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index a26ec0786849d..d0dc83dc871d6 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -972,7 +972,7 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): first_test_score = all_out[0]["test_scores"] self.multimetric_ = isinstance(first_test_score, dict) - # check refit_metric now for a callabe scorer that is multimetric + # check refit_metric now for a callable scorer that is multimetric if callable(self.scoring) and self.multimetric_: self._check_refit_for_multimetric(first_test_score) refit_metric = self.refit diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 176627ace91d4..28780361bb6f1 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -27,7 +27,7 @@ from ..base import clone, is_classifier from ..exceptions import FitFailedWarning, UnsetMetadataPassedError from ..metrics import check_scoring, get_scorer_names -from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer +from ..metrics._scorer import _MultimetricScorer from ..preprocessing import LabelEncoder from ..utils import Bunch, _safe_indexing, check_random_state, indexable from ..utils._param_validation import ( @@ -352,15 +352,9 @@ def cross_validate( cv = check_cv(cv, y, classifier=is_classifier(estimator)) - if callable(scoring): - scorers = scoring - elif scoring is None or isinstance(scoring, str): - scorers = check_scoring(estimator, scoring) - else: - scorers = _check_multimetric_scoring(estimator, scoring) - scorers = _MultimetricScorer( - scorers=scorers, raise_exc=(error_score == "raise") - ) + scorers = check_scoring( + estimator, scoring=scoring, raise_exc=(error_score == "raise") + ) if _routing_enabled(): # For estimators, a MetadataRouter is created in get_metadata_routing From 7a657ab36466aa9380a806fb4873dcc6a000c18c Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 14 May 2024 14:32:27 +0200 Subject: [PATCH 2/9] add test --- doc/whats_new/v1.6.rst | 9 ++++++- sklearn/metrics/_scorer.py | 2 +- sklearn/metrics/tests/test_score_objects.py | 28 +++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 6eda6717b3d1b..1878a95b8abf9 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -38,7 +38,7 @@ See :ref:`array_api` for more details. **Classes:** -- +- Changelog --------- @@ -54,6 +54,13 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123455 is the *pull request* number, not the issue number. +:mod:`sklearn.metrics` +...................... + +- |Enhancement| :func:`sklearn.metrics.check_scoring` now accepts `raise_exc` to specify + whether to raise an exception if a subset of the scorers in multimetric scoring fails + or to return an error code. :pr:`28992` by :user:`Stefanie Senger `. + Thanks to everyone who has contributed to the maintenance and improvement of the project since version 1.5, including: diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 61c278c8dcf5c..65c3f85eed481 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -991,7 +991,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T raise_exc : bool, default=True Whether to raise an exception if a subset of the scorers in multimetric scoring - fails or return an error code. + fails or to return an error code as a str for the failing scorer(s). - If set to `True` raises the failing scorer's exception. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 9960c32fc3938..bfe8f57f92265 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1557,6 +1557,34 @@ def test_multimetric_scorer_repr(): assert str(multi_metric_scorer) == 'MultiMetricScorer("accuracy", "r2")' +def test_check_scoring_multimetric_raise_exc(): + """Test that check_scoring returns error code for a subset of scorers in + multimetric scoring if raise_exc=False and raises otherwise.""" + + def raising_scorer(estimator, X, y): + raise ValueError("That doesn't work.") + + X, y = make_classification(n_samples=150, n_features=10, random_state=0) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + clf = LogisticRegression().fit(X_train, y_train) + + # "raising_scorer" is raising ValueError and should return an string representation + # of the error of the last scorer: + scoring = { + "accuracy": make_scorer(accuracy_score), + "raising_scorer": raising_scorer, + } + scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False) + scores = scoring_call(clf, X_test, y_test) + assert "That doesn't work." in scores["raising_scorer"] + + # should raise an error + scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=True) + err_msg = "That doesn't work." + with pytest.raises(ValueError, match=err_msg): + scores = scoring_call(clf, X_test, y_test) + + @pytest.mark.parametrize("enable_metadata_routing", [True, False]) def test_metadata_routing_multimetric_metadata_routing(enable_metadata_routing): """Test multimetric scorer works with and without metadata routing enabled when From d1efd107dcadc8467337ab0b7259b8ee09b12cdc Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 14 May 2024 14:44:08 +0200 Subject: [PATCH 3/9] mention set --- sklearn/metrics/_scorer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 65c3f85eed481..cbc7a258918f5 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -978,7 +978,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T If `scoring` represents multiple scores, one can use: - - a list or tuple of unique strings; + - a list, tuple or set of unique strings; - a callable returning a dictionary where the keys are the metric names and the values are the metric scorers; - a dictionary with metric names as keys and callables a values. From 2cef99d365aa28227bfbfd17022e5a692d20461e Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 14 May 2024 14:48:39 +0200 Subject: [PATCH 4/9] documentation: add signature of callables --- sklearn/metrics/_scorer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index cbc7a258918f5..6054d657d36f3 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -981,7 +981,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T - a list, tuple or set of unique strings; - a callable returning a dictionary where the keys are the metric names and the values are the metric scorers; - - a dictionary with metric names as keys and callables a values. + - a dictionary with metric names as keys and callables a values. The callables + need to have the signature `callable(estimator, X, y)`. If None, the provided estimator object's `score` method is used. From 9cd74d948f562ef3e71c3d5b51625cc7b46fa9b6 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 14 May 2024 14:58:01 +0200 Subject: [PATCH 5/9] fix docstring --- sklearn/metrics/_scorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 6054d657d36f3..829e6db4c3501 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -982,7 +982,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T - a callable returning a dictionary where the keys are the metric names and the values are the metric scorers; - a dictionary with metric names as keys and callables a values. The callables - need to have the signature `callable(estimator, X, y)`. + need to have the signature `callable(estimator, X, y)`. If None, the provided estimator object's `score` method is used. @@ -992,7 +992,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T raise_exc : bool, default=True Whether to raise an exception if a subset of the scorers in multimetric scoring - fails or to return an error code as a str for the failing scorer(s). + fails or to return an error code. - If set to `True` raises the failing scorer's exception. From f4b68b73be97b2145be001fc400c732c3ad2e03e Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Tue, 21 May 2024 09:53:32 +0200 Subject: [PATCH 6/9] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_scorer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 829e6db4c3501..552853e182281 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -994,13 +994,14 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T Whether to raise an exception if a subset of the scorers in multimetric scoring fails or to return an error code. - - If set to `True` raises the failing scorer's exception. - - - If set to `False` a formatted string of the exception details is passed as + - If set to `True`, raises the failing scorer's exception. + - If set to `False`, a formatted string of the exception details is passed as result of the failing scorer(s). This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is a str or a callable. + + .. versionadded:: 1.6 Returns ------- From 549e764bd24824835ac078b3765a5e9bae50b46a Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 21 May 2024 14:22:10 +0200 Subject: [PATCH 7/9] add example --- sklearn/metrics/_scorer.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 552853e182281..cb6eec94d464f 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -979,8 +979,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T If `scoring` represents multiple scores, one can use: - a list, tuple or set of unique strings; - - a callable returning a dictionary where the keys are the metric - names and the values are the metric scorers; + - a callable returning a dictionary where the keys are the metric names and the + values are the metric scorers; - a dictionary with metric names as keys and callables a values. The callables need to have the signature `callable(estimator, X, y)`. @@ -1000,14 +1000,13 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is a str or a callable. - + .. versionadded:: 1.6 Returns ------- scoring : callable - A scorer callable object / function with signature - ``scorer(estimator, X, y)``. + A scorer callable object / function with signature ``scorer(estimator, X, y)``. Examples -------- @@ -1019,6 +1018,22 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T >>> scorer = check_scoring(classifier, scoring='accuracy') >>> scorer(classifier, X, y) 0.96... + + >>> from sklearn.metrics import make_scorer, accuracy_score, mean_squared_log_error + >>> X, y = load_iris(return_X_y=True) + >>> y *= -1 + >>> clf = DecisionTreeClassifier().fit(X, y) + >>> scoring = { + >>> "accuracy": make_scorer(accuracy_score), + >>> "mean_squared_log_error": make_scorer(mean_squared_log_error), + >>> } + >>> scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False) + >>> scores = scoring_call(clf, X, y) + >>> scores + {'accuracy': 1.0, + 'mean_squared_log_error': 'Traceback (most recent call last): ... raise \ + ValueError(ValueError: Mean Squared Logarithmic Error cannot be used when targets \ + contain negative values.'} # doctest: +SKIP """ if isinstance(scoring, str): return get_scorer(scoring) From 395d0aead8a032a1b089579f9fbbbf1bac3ddf71 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Tue, 21 May 2024 15:37:00 +0200 Subject: [PATCH 8/9] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_scorer.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index cb6eec94d464f..4b83801e393a7 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -1024,16 +1024,13 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T >>> y *= -1 >>> clf = DecisionTreeClassifier().fit(X, y) >>> scoring = { - >>> "accuracy": make_scorer(accuracy_score), - >>> "mean_squared_log_error": make_scorer(mean_squared_log_error), - >>> } + ... "accuracy": make_scorer(accuracy_score), + ... "mean_squared_log_error": make_scorer(mean_squared_log_error), + ... } >>> scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False) >>> scores = scoring_call(clf, X, y) >>> scores - {'accuracy': 1.0, - 'mean_squared_log_error': 'Traceback (most recent call last): ... raise \ - ValueError(ValueError: Mean Squared Logarithmic Error cannot be used when targets \ - contain negative values.'} # doctest: +SKIP + {'accuracy': 1.0, 'mean_squared_log_error': 'Traceback ...'} """ if isinstance(scoring, str): return get_scorer(scoring) From d9bf3a4535aadcbef1f254ced130d37ade4e71a8 Mon Sep 17 00:00:00 2001 From: Stefanie Senger Date: Tue, 21 May 2024 15:39:40 +0200 Subject: [PATCH 9/9] add parenthesis --- sklearn/metrics/_scorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 4b83801e393a7..c1a916aa0b5f3 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -991,8 +991,8 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T estimator has no `score` method. raise_exc : bool, default=True - Whether to raise an exception if a subset of the scorers in multimetric scoring - fails or to return an error code. + Whether to raise an exception (if a subset of the scorers in multimetric scoring + fails) or to return an error code. - If set to `True`, raises the failing scorer's exception. - If set to `False`, a formatted string of the exception details is passed as