Raise an error when all fits fail in cross-validation or grid-search (#21026)

lesteve · web-flow · commit 93bc20f7eb68 · 2021-10-05T16:42:51.000+02:00
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -53,6 +53,19 @@ Changelog
   message when the solver does not support sparse matrices with int64 indices.
   :pr:`21093` by `Tom Dupre la Tour`_.
 
+:mod:`sklearn.model_selection`
+..............................
+
+- |Enhancement| raise an error during cross-validation when the fits for all the
+  splits failed. Similarly raise an error during grid-search when the fits for
+  all the models and all the splits failed. :pr:`21026` by :user:`Loïc Estève <lesteve>`.
+
+:mod:`sklearn.pipeline`
+.......................
+
+- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
+  Setting a transformer to "passthrough" will pass the features unchanged.
+  :pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.
 
 :mod:`sklearn.utils`
 ....................
@@ -69,13 +82,6 @@ Changelog
   :pr:`20880` by :user:`Guillaume Lemaitre <glemaitre>`
   and :user:`András Simon <simonandras>`.
 
-:mod:`sklearn.pipeline`
-.......................
-
-- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
-  Setting a transformer to "passthrough" will pass the features unchanged.
-  :pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.
-
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
@@ -31,7 +31,7 @@
 from ._validation import _aggregate_score_dicts
 from ._validation import _insert_error_scores
 from ._validation import _normalize_score_results
-from ._validation import _warn_about_fit_failures
+from ._validation import _warn_or_raise_about_fit_failures
 from ..exceptions import NotFittedError
 from joblib import Parallel
 from ..utils import check_random_state
@@ -865,7 +865,7 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
                         "splits, got {}".format(n_splits, len(out) // n_candidates)
                     )
 
-                _warn_about_fit_failures(out, self.error_score)
+                _warn_or_raise_about_fit_failures(out, self.error_score)
 
                 # For callable self.scoring, the return type is only know after
                 # calling. If the return type is a dictionary, the error scores
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
@@ -30,7 +30,7 @@
 from ..utils.metaestimators import _safe_split
 from ..metrics import check_scoring
 from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
-from ..exceptions import FitFailedWarning, NotFittedError
+from ..exceptions import FitFailedWarning
 from ._split import check_cv
 from ..preprocessing import LabelEncoder
 
@@ -283,7 +283,7 @@ def cross_validate(
         for train, test in cv.split(X, y, groups)
     )
 
-    _warn_about_fit_failures(results, error_score)
+    _warn_or_raise_about_fit_failures(results, error_score)
 
     # For callabe scoring, the return type is only know after calling. If the
     # return type is a dictionary, the error scores can now be inserted with
@@ -327,9 +327,6 @@ def _insert_error_scores(results, error_score):
         elif successful_score is None:
             successful_score = result["test_scores"]
 
-    if successful_score is None:
-        raise NotFittedError("All estimators failed to fit")
-
     if isinstance(successful_score, dict):
         formatted_error = {name: error_score for name in successful_score}
         for i in failed_indices:
@@ -347,7 +344,7 @@ def _normalize_score_results(scores, scaler_score_key="score"):
     return {scaler_score_key: scores}
 
 
-def _warn_about_fit_failures(results, error_score):
+def _warn_or_raise_about_fit_failures(results, error_score):
     fit_errors = [
         result["fit_error"] for result in results if result["fit_error"] is not None
     ]
@@ -361,15 +358,25 @@ def _warn_about_fit_failures(results, error_score):
             for error, n in fit_errors_counter.items()
         )
 
-        some_fits_failed_message = (
-            f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
-            "The score on these train-test partitions for these parameters"
-            f" will be set to {error_score}.\n"
-            "If these failures are not expected, you can try to debug them "
-            "by setting error_score='raise'.\n\n"
-            f"Below are more details about the failures:\n{fit_errors_summary}"
-        )
-        warnings.warn(some_fits_failed_message, FitFailedWarning)
+        if num_failed_fits == num_fits:
+            all_fits_failed_message = (
+                f"\nAll the {num_fits} fits failed.\n"
+                "It is is very likely that your model is misconfigured.\n"
+                "You can try to debug the error by setting error_score='raise'.\n\n"
+                f"Below are more details about the failures:\n{fit_errors_summary}"
+            )
+            raise ValueError(all_fits_failed_message)
+
+        else:
+            some_fits_failed_message = (
+                f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
+                "The score on these train-test partitions for these parameters"
+                f" will be set to {error_score}.\n"
+                "If these failures are not expected, you can try to debug them "
+                "by setting error_score='raise'.\n\n"
+                f"Below are more details about the failures:\n{fit_errors_summary}"
+            )
+            warnings.warn(some_fits_failed_message, FitFailedWarning)
 
 
 def cross_val_score(
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
@@ -29,7 +29,6 @@
 
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.base import is_classifier
-from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification
 from sklearn.datasets import make_blobs
 from sklearn.datasets import make_multilabel_classification
@@ -1628,6 +1627,27 @@ def get_cand_scores(i):
     assert gs.best_index_ != clf.FAILING_PARAMETER
 
 
+def test_grid_search_classifier_all_fits_fail():
+    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
+
+    clf = FailingClassifier()
+
+    gs = GridSearchCV(
+        clf,
+        [{"parameter": [FailingClassifier.FAILING_PARAMETER] * 3}],
+        error_score=0.0,
+    )
+
+    warning_message = re.compile(
+        "All the 15 fits failed.+"
+        "15 fits failed with the following error.+ValueError.+Failing classifier failed"
+        " as required",
+        flags=re.DOTALL,
+    )
+    with pytest.raises(ValueError, match=warning_message):
+        gs.fit(X, y)
+
+
 def test_grid_search_failing_classifier_raise():
     # GridSearchCV with on_error == 'raise' raises the error
 
@@ -2130,7 +2150,7 @@ def custom_scorer(est, X, y):
     assert_allclose(gs.cv_results_["mean_test_acc"], [1, 1, 0.1])
 
 
-def test_callable_multimetric_clf_all_fails():
+def test_callable_multimetric_clf_all_fits_fail():
     # Warns and raises when all estimator fails to fit.
     def custom_scorer(est, X, y):
         return {"acc": 1}
@@ -2141,16 +2161,20 @@ def custom_scorer(est, X, y):
 
     gs = GridSearchCV(
         clf,
-        [{"parameter": [2, 2, 2]}],
+        [{"parameter": [FailingClassifier.FAILING_PARAMETER] * 3}],
         scoring=custom_scorer,
         refit=False,
         error_score=0.1,
     )
 
-    with pytest.warns(
-        FitFailedWarning,
-        match="15 fits failed.+total of 15",
-    ), pytest.raises(NotFittedError, match="All estimators failed to fit"):
+    individual_fit_error_message = "ValueError: Failing classifier failed as required"
+    error_message = re.compile(
+        "All the 15 fits failed.+your model is misconfigured.+"
+        f"{individual_fit_error_message}",
+        flags=re.DOTALL,
+    )
+
+    with pytest.raises(ValueError, match=error_message):
         gs.fit(X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
@@ -1774,7 +1774,7 @@ def test_nested_cv():
         LeaveOneOut(),
         GroupKFold(n_splits=3),
         StratifiedKFold(),
-        StratifiedGroupKFold(),
+        StratifiedGroupKFold(n_splits=3),
         StratifiedShuffleSplit(n_splits=3, random_state=0),
     ]
 
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
@@ -2130,38 +2130,66 @@ def test_fit_and_score_working():
     assert result["parameters"] == fit_and_score_kwargs["parameters"]
 
 
+class DataDependentFailingClassifier(BaseEstimator):
+    def __init__(self, max_x_value=None):
+        self.max_x_value = max_x_value
+
+    def fit(self, X, y=None):
+        num_values_too_high = (X > self.max_x_value).sum()
+        if num_values_too_high:
+            raise ValueError(
+                f"Classifier fit failed with {num_values_too_high} values too high"
+            )
+
+    def score(self, X=None, Y=None):
+        return 0.0
+
+
 @pytest.mark.parametrize("error_score", [np.nan, 0])
-def test_cross_validate_failing_fits_warnings(error_score):
+def test_cross_validate_some_failing_fits_warning(error_score):
     # Create a failing classifier to deliberately fail
-    failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
+    failing_clf = DataDependentFailingClassifier(max_x_value=8)
     # dummy X data
     X = np.arange(1, 10)
     y = np.ones(9)
-    # fit_and_score_args = [failing_clf, X, None, dict(), None, None, 0, None, None]
     # passing error score to trigger the warning message
     cross_validate_args = [failing_clf, X, y]
-    cross_validate_kwargs = {"cv": 7, "error_score": error_score}
+    cross_validate_kwargs = {"cv": 3, "error_score": error_score}
     # check if the warning message type is as expected
+
+    individual_fit_error_message = (
+        "ValueError: Classifier fit failed with 1 values too high"
+    )
     warning_message = re.compile(
-        "7 fits failed.+total of 7.+The score on these"
+        "2 fits failed.+total of 3.+The score on these"
         " train-test partitions for these parameters will be set to"
-        f" {cross_validate_kwargs['error_score']}.",
+        f" {cross_validate_kwargs['error_score']}.+{individual_fit_error_message}",
         flags=re.DOTALL,
     )
 
     with pytest.warns(FitFailedWarning, match=warning_message):
         cross_validate(*cross_validate_args, **cross_validate_kwargs)
 
-    # since we're using FailingClassfier, our error will be the following
-    error_message = "ValueError: Failing classifier failed as required"
 
-    # check traceback is included
-    warning_message = re.compile(
-        "The score on these train-test partitions for these parameters will be set"
-        f" to {cross_validate_kwargs['error_score']}.+{error_message}",
-        re.DOTALL,
+@pytest.mark.parametrize("error_score", [np.nan, 0])
+def test_cross_validate_all_failing_fits_error(error_score):
+    # Create a failing classifier to deliberately fail
+    failing_clf = FailingClassifier(FailingClassifier.FAILING_PARAMETER)
+    # dummy X data
+    X = np.arange(1, 10)
+    y = np.ones(9)
+
+    cross_validate_args = [failing_clf, X, y]
+    cross_validate_kwargs = {"cv": 7, "error_score": error_score}
+
+    individual_fit_error_message = "ValueError: Failing classifier failed as required"
+    error_message = re.compile(
+        "All the 7 fits failed.+your model is misconfigured.+"
+        f"{individual_fit_error_message}",
+        flags=re.DOTALL,
     )
-    with pytest.warns(FitFailedWarning, match=warning_message):
+
+    with pytest.raises(ValueError, match=error_message):
         cross_validate(*cross_validate_args, **cross_validate_kwargs)
 
 

Original file line number	Diff line number	Diff line change
`@@ -1774,7 +1774,7 @@ def test_nested_cv():`
`1774`	`1774`	`LeaveOneOut(),`
`1775`	`1775`	`GroupKFold(n_splits=3),`
`1776`	`1776`	`StratifiedKFold(),`
`1777`		`- StratifiedGroupKFold(),`
	`1777`	`+ StratifiedGroupKFold(n_splits=3),`
`1778`	`1778`	`StratifiedShuffleSplit(n_splits=3, random_state=0),`
`1779`	`1779`	`]`
`1780`	`1780`