diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst index 53b0eb017fc57..87814e102ad98 100644 --- a/doc/whats_new/v1.6.rst +++ b/doc/whats_new/v1.6.rst @@ -80,6 +80,13 @@ Changelog - |Enhancement| Added a function :func:`base.is_clusterer` which determines whether a given estimator is of category clusterer. :pr:`28936` by :user:`Christian Veenhuis `. + +:mod:`sklearn.metrics` +...................... + +- |Enhancement| :func:`sklearn.metrics.check_scoring` now accepts `raise_exc` to specify + whether to raise an exception if a subset of the scorers in multimetric scoring fails + or to return an error code. :pr:`28992` by :user:`Stefanie Senger `. Thanks to everyone who has contributed to the maintenance and improvement of the project since version 1.5, including: diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 7c5cd8d45b8d1..a0f4b74a44671 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -539,7 +539,7 @@ class RFECV(RFE): ``cv`` default value of None changed from 3-fold to 5-fold. scoring : str, callable or None, default=None - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 055ccc1c6a202..e8755c659af6e 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -636,7 +636,7 @@ def _log_reg_scoring_path( values are chosen in a logarithmic scale between 1e-4 and 1e4. scoring : callable - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. @@ -1521,7 +1521,7 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima solver. scoring : str or callable, default=None - A string (see model evaluation documentation) or + A string (see :ref:`scoring_parameter`) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. For a list of scoring functions that can be used, look at :mod:`sklearn.metrics`. The diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index bc9d8ab3d651a..c1a916aa0b5f3 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -955,10 +955,11 @@ def get_scorer_names(): None, ], "allow_none": ["boolean"], + "raise_exc": ["boolean"], }, prefer_skip_nested_validation=True, ) -def check_scoring(estimator=None, scoring=None, *, allow_none=False): +def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=True): """Determine scorer from user options. A TypeError will be thrown if the estimator cannot be scored. @@ -969,7 +970,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): The object to use to fit the data. If `None`, then this function may error depending on `allow_none`. - scoring : str, callable, list, tuple, or dict, default=None + scoring : str, callable, list, tuple, set, or dict, default=None Scorer to use. If `scoring` represents a single score, one can use: - a single string (see :ref:`scoring_parameter`); @@ -977,22 +978,35 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): If `scoring` represents multiple scores, one can use: - - a list or tuple of unique strings; - - a callable returning a dictionary where the keys are the metric - names and the values are the metric scorers; - - a dictionary with metric names as keys and callables a values. + - a list, tuple or set of unique strings; + - a callable returning a dictionary where the keys are the metric names and the + values are the metric scorers; + - a dictionary with metric names as keys and callables a values. The callables + need to have the signature `callable(estimator, X, y)`. If None, the provided estimator object's `score` method is used. allow_none : bool, default=False - If no scoring is specified and the estimator has no score function, we - can either return None or raise an exception. + Whether to return None or raise an error if no `scoring` is specified and the + estimator has no `score` method. + + raise_exc : bool, default=True + Whether to raise an exception (if a subset of the scorers in multimetric scoring + fails) or to return an error code. + + - If set to `True`, raises the failing scorer's exception. + - If set to `False`, a formatted string of the exception details is passed as + result of the failing scorer(s). + + This applies if `scoring` is list, tuple, set, or dict. Ignored if `scoring` is + a str or a callable. + + .. versionadded:: 1.6 Returns ------- scoring : callable - A scorer callable object / function with signature - ``scorer(estimator, X, y)``. + A scorer callable object / function with signature ``scorer(estimator, X, y)``. Examples -------- @@ -1004,6 +1018,19 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): >>> scorer = check_scoring(classifier, scoring='accuracy') >>> scorer(classifier, X, y) 0.96... + + >>> from sklearn.metrics import make_scorer, accuracy_score, mean_squared_log_error + >>> X, y = load_iris(return_X_y=True) + >>> y *= -1 + >>> clf = DecisionTreeClassifier().fit(X, y) + >>> scoring = { + ... "accuracy": make_scorer(accuracy_score), + ... "mean_squared_log_error": make_scorer(mean_squared_log_error), + ... } + >>> scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False) + >>> scores = scoring_call(clf, X, y) + >>> scores + {'accuracy': 1.0, 'mean_squared_log_error': 'Traceback ...'} """ if isinstance(scoring, str): return get_scorer(scoring) @@ -1026,7 +1053,7 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False): return get_scorer(scoring) if isinstance(scoring, (list, tuple, set, dict)): scorers = _check_multimetric_scoring(estimator, scoring=scoring) - return _MultimetricScorer(scorers=scorers) + return _MultimetricScorer(scorers=scorers, raise_exc=raise_exc) if scoring is None: if hasattr(estimator, "score"): return _PassthroughScorer(estimator) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 9960c32fc3938..bfe8f57f92265 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1557,6 +1557,34 @@ def test_multimetric_scorer_repr(): assert str(multi_metric_scorer) == 'MultiMetricScorer("accuracy", "r2")' +def test_check_scoring_multimetric_raise_exc(): + """Test that check_scoring returns error code for a subset of scorers in + multimetric scoring if raise_exc=False and raises otherwise.""" + + def raising_scorer(estimator, X, y): + raise ValueError("That doesn't work.") + + X, y = make_classification(n_samples=150, n_features=10, random_state=0) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + clf = LogisticRegression().fit(X_train, y_train) + + # "raising_scorer" is raising ValueError and should return an string representation + # of the error of the last scorer: + scoring = { + "accuracy": make_scorer(accuracy_score), + "raising_scorer": raising_scorer, + } + scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=False) + scores = scoring_call(clf, X_test, y_test) + assert "That doesn't work." in scores["raising_scorer"] + + # should raise an error + scoring_call = check_scoring(estimator=clf, scoring=scoring, raise_exc=True) + err_msg = "That doesn't work." + with pytest.raises(ValueError, match=err_msg): + scores = scoring_call(clf, X_test, y_test) + + @pytest.mark.parametrize("enable_metadata_routing", [True, False]) def test_metadata_routing_multimetric_metadata_routing(enable_metadata_routing): """Test multimetric scorer works with and without metadata routing enabled when diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index 1f891577b4680..e090a3d042746 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -639,7 +639,7 @@ class TunedThresholdClassifierCV(BaseThresholdClassifier): The objective metric to be optimized. Can be one of: * a string associated to a scoring function for binary classification - (see model evaluation documentation); + (see :ref:`scoring_parameter`); * a scorer callable object created with :func:`~sklearn.metrics.make_scorer`; response_method : {"auto", "decision_function", "predict_proba"}, default="auto" diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 856276229ba5c..edf492b84877a 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -972,7 +972,7 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None): first_test_score = all_out[0]["test_scores"] self.multimetric_ = isinstance(first_test_score, dict) - # check refit_metric now for a callabe scorer that is multimetric + # check refit_metric now for a callable scorer that is multimetric if callable(self.scoring) and self.multimetric_: self._check_refit_for_multimetric(first_test_score) refit_metric = self.refit diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 83d289d36efb2..e0858a2c4860a 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -27,7 +27,7 @@ from ..base import clone, is_classifier from ..exceptions import FitFailedWarning, UnsetMetadataPassedError from ..metrics import check_scoring, get_scorer_names -from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer +from ..metrics._scorer import _MultimetricScorer from ..preprocessing import LabelEncoder from ..utils import Bunch, _safe_indexing, check_random_state, indexable from ..utils._param_validation import ( @@ -353,15 +353,9 @@ def cross_validate( cv = check_cv(cv, y, classifier=is_classifier(estimator)) - if callable(scoring): - scorers = scoring - elif scoring is None or isinstance(scoring, str): - scorers = check_scoring(estimator, scoring) - else: - scorers = _check_multimetric_scoring(estimator, scoring) - scorers = _MultimetricScorer( - scorers=scorers, raise_exc=(error_score == "raise") - ) + scorers = check_scoring( + estimator, scoring=scoring, raise_exc=(error_score == "raise") + ) if _routing_enabled(): # For estimators, a MetadataRouter is created in get_metadata_routing