From cc8f605357dd80a8a8e062718b8651de29a1bc4c Mon Sep 17 00:00:00 2001 From: Tim Head Date: Tue, 2 Sep 2025 14:23:51 +0200 Subject: [PATCH] Make common tests strict This allows us to find common tests that were marked as xfail but have started passing. If a test outcome changes we either need to update our xfail list or it is a bug that needs adressing. --- sklearn/tests/test_common.py | 4 +- .../utils/_test_common/instance_generator.py | 84 ++++++------------- 2 files changed, 27 insertions(+), 61 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index a48ea5231560b..9b64ec2bb7729 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -112,7 +112,9 @@ def test_get_check_estimator_ids(val, expected): @parametrize_with_checks( - list(_tested_estimators()), expected_failed_checks=_get_expected_failed_checks + list(_tested_estimators()), + expected_failed_checks=_get_expected_failed_checks, + xfail_strict=True, ) def test_estimators(estimator, check, request): # Common tests for estimator instances diff --git a/sklearn/utils/_test_common/instance_generator.py b/sklearn/utils/_test_common/instance_generator.py index 355e35aa6308a..145ef56ca8ce4 100644 --- a/sklearn/utils/_test_common/instance_generator.py +++ b/sklearn/utils/_test_common/instance_generator.py @@ -145,12 +145,9 @@ RegressorChain, ) from sklearn.neighbors import ( - KernelDensity, KNeighborsClassifier, KNeighborsRegressor, - KNeighborsTransformer, NeighborhoodComponentsAnalysis, - RadiusNeighborsTransformer, ) from sklearn.neural_network import BernoulliRBM, MLPClassifier, MLPRegressor from sklearn.pipeline import FeatureUnion, Pipeline @@ -845,24 +842,6 @@ def _yield_instances_for_check(check, estimator_orig): PER_ESTIMATOR_XFAIL_CHECKS = { - AdaBoostClassifier: { - # TODO: replace by a statistical test, see meta-issue #16298 - "check_sample_weight_equivalence_on_dense_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - "check_sample_weight_equivalence_on_sparse_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - }, - AdaBoostRegressor: { - # TODO: replace by a statistical test, see meta-issue #16298 - "check_sample_weight_equivalence_on_dense_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - "check_sample_weight_equivalence_on_sparse_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - }, BaggingClassifier: { # TODO: replace by a statistical test, see meta-issue #16298 "check_sample_weight_equivalence_on_dense_data": ( @@ -914,7 +893,6 @@ def _yield_instances_for_check(check, estimator_orig): "check_dont_overwrite_parameters": "FIXME", }, FixedThresholdClassifier: { - "check_classifiers_train": "Threshold at probability 0.5 does not hold", "check_sample_weight_equivalence_on_dense_data": ( "Due to the cross-validation and sample ordering, removing a sample" " is not strictly equal to putting is weight to zero. Specific unit" @@ -950,21 +928,15 @@ def _yield_instances_for_check(check, estimator_orig): "check_fit2d_1sample": ( "Fail during parameter check since min/max resources requires more samples" ), - "check_estimators_nan_inf": "FIXME", - "check_classifiers_one_label_sample_weights": "FIXME", - "check_fit2d_1feature": "FIXME", - "check_supervised_y_2d": "DataConversionWarning not caught", "check_requires_y_none": "Doesn't fail gracefully", + "check_supervised_y_2d": "DataConversionWarning not caught", }, HalvingRandomSearchCV: { "check_fit2d_1sample": ( "Fail during parameter check since min/max resources requires more samples" ), - "check_estimators_nan_inf": "FIXME", - "check_classifiers_one_label_sample_weights": "FIXME", - "check_fit2d_1feature": "FIXME", - "check_supervised_y_2d": "DataConversionWarning not caught", "check_requires_y_none": "Doesn't fail gracefully", + "check_supervised_y_2d": "DataConversionWarning not caught", }, HistGradientBoostingClassifier: { # TODO: replace by a statistical test, see meta-issue #16298 @@ -993,11 +965,6 @@ def _yield_instances_for_check(check, estimator_orig): "sample_weight is not equivalent to removing/repeating samples." ), }, - KernelDensity: { - "check_sample_weight_equivalence_on_dense_data": ( - "sample_weight must have positive values" - ), - }, KMeans: { # TODO: replace by a statistical test, see meta-issue #16298 "check_sample_weight_equivalence_on_dense_data": ( @@ -1007,9 +974,6 @@ def _yield_instances_for_check(check, estimator_orig): "sample_weight is not equivalent to removing/repeating samples." ), }, - KNeighborsTransformer: { - "check_methods_sample_order_invariance": "check is not applicable." - }, LinearSVC: { # TODO: replace by a statistical test when _dual=True, see meta-issue #16298 "check_sample_weight_equivalence_on_dense_data": ( @@ -1018,9 +982,6 @@ def _yield_instances_for_check(check, estimator_orig): "check_sample_weight_equivalence_on_sparse_data": ( "sample_weight is not equivalent to removing/repeating samples." ), - "check_non_transformer_estimators_n_iter": ( - "n_iter_ cannot be easily accessed." - ), }, LinearSVR: { # TODO: replace by a statistical test, see meta-issue #16298 @@ -1031,15 +992,6 @@ def _yield_instances_for_check(check, estimator_orig): "sample_weight is not equivalent to removing/repeating samples." ), }, - LogisticRegression: { - # TODO: fix sample_weight handling of this estimator, see meta-issue #16298 - "check_sample_weight_equivalence_on_dense_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - "check_sample_weight_equivalence_on_sparse_data": ( - "sample_weight is not equivalent to removing/repeating samples." - ), - }, MiniBatchKMeans: { # TODO: replace by a statistical test, see meta-issue #16298 "check_sample_weight_equivalence_on_dense_data": ( @@ -1106,9 +1058,6 @@ def _yield_instances_for_check(check, estimator_orig): "Therefore this test is x-fail until we fix this." ), }, - RadiusNeighborsTransformer: { - "check_methods_sample_order_invariance": "check is not applicable." - }, RandomForestClassifier: { # TODO: replace by a statistical test, see meta-issue #16298 "check_sample_weight_equivalence_on_dense_data": ( @@ -1128,8 +1077,8 @@ def _yield_instances_for_check(check, estimator_orig): ), }, RandomizedSearchCV: { - "check_supervised_y_2d": "DataConversionWarning not caught", "check_requires_y_none": "Doesn't fail gracefully", + "check_supervised_y_2d": "DataConversionWarning not caught", }, RandomTreesEmbedding: { # TODO: replace by a statistical test, see meta-issue #16298 @@ -1206,11 +1155,6 @@ def _yield_instances_for_check(check, estimator_orig): "check_estimators_dtypes": "raises nan error", "check_fit2d_1sample": "_scale_normalize fails", "check_fit2d_1feature": "raises apply_along_axis error", - "check_estimator_sparse_matrix": "does not fail gracefully", - "check_estimator_sparse_array": "does not fail gracefully", - "check_methods_subset_invariance": "empty array passed inside", - "check_dont_overwrite_parameters": "empty array passed inside", - "check_fit2d_predict1d": "empty array passed inside", }, SVC: { # TODO: fix sample_weight handling of this estimator when probability=False @@ -1254,7 +1198,9 @@ def _yield_instances_for_check(check, estimator_orig): def _get_expected_failed_checks(estimator): """Get the expected failed checks for all estimators in scikit-learn.""" - failed_checks = PER_ESTIMATOR_XFAIL_CHECKS.get(type(estimator), {}) + # Make a copy so that our modifications are not permanent. Important for + # estimators that are tested with multiple hyper-parameters. + failed_checks = dict(PER_ESTIMATOR_XFAIL_CHECKS.get(type(estimator), {})) tags = get_tags(estimator) @@ -1287,4 +1233,22 @@ def _get_expected_failed_checks(estimator): } ) + if type(estimator) == DummyClassifier and estimator.strategy == "most_frequent": + failed_checks.pop("check_methods_sample_order_invariance") + failed_checks.pop("check_methods_subset_invariance") + + if type(estimator) in ( + GridSearchCV, + HalvingGridSearchCV, + HalvingRandomSearchCV, + RandomizedSearchCV, + ) and ( + type(estimator.estimator) == LogisticRegression + or ( + type(estimator.estimator) == Pipeline + and estimator.estimator.steps[1][0] == "logisticregression" + ) + ): + failed_checks.pop("check_supervised_y_2d") + return failed_checks