From 9a624fcc73a25f83f41c780de1987d06effef86f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 16:05:35 +0200 Subject: [PATCH 01/17] ENH check_is_fitted calls __is_fitted__ if available --- sklearn/pipeline.py | 17 ++++++++++ .../preprocessing/_function_transformer.py | 4 +++ sklearn/utils/estimator_checks.py | 33 +++++++++++++++++++ sklearn/utils/tests/test_validation.py | 16 ++++++++- sklearn/utils/validation.py | 12 ++++--- 5 files changed, 77 insertions(+), 5 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 0814632721ba4..d679e2df9694f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -26,7 +26,9 @@ from .utils.deprecation import deprecated from .utils._tags import _safe_tags from .utils.validation import check_memory +from .utils.validation import check_is_fitted from .utils.fixes import delayed +from .exceptions import NotFittedError from .utils.metaestimators import _BaseComposition @@ -657,6 +659,21 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ + def __is_fitted__(self): + """Check if pipeline is fitted. + + Returns + ------- + bool + True if the last step is fitted, false otherwise. + """ + try: + # check if the last step of the pipeline is fitted + check_is_fitted(self.steps[-1][1]) + return True + except NotFittedError: + return False + def _sk_visual_block_(self): _, estimators = zip(*self.steps) diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 345cc96bb1c2e..ad5e34c38b683 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -176,5 +176,9 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) + def __is_fitted__(self): + """Return True since FunctionTransfomer is stateless.""" + return True + def _more_tags(self): return {"no_validation": not self.validate, "stateless": True} diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ff853be22f663..5fa8e1e5f82cd 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -50,6 +50,7 @@ from ..model_selection import ShuffleSplit from ..model_selection._validation import _safe_split from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances +from ..utils.validation import check_is_fitted from . import shuffle from ._tags import ( @@ -301,6 +302,7 @@ def _yield_all_checks(estimator): yield check_dict_unchanged yield check_dont_overwrite_parameters yield check_fit_idempotent + yield check_fit_check_is_fitted if not tags["no_validation"]: yield check_n_features_in yield check_fit1d @@ -3314,6 +3316,37 @@ def check_fit_idempotent(name, estimator_orig): ) +def check_fit_check_is_fitted(name, estimator_orig): + # Make sure that n_features_in_ attribute doesn't exist until fit is + # called, and that its value is correct. + + rng = np.random.RandomState(42) + + estimator = clone(estimator_orig) + set_random_state(estimator) + if "warm_start" in estimator.get_params(): + estimator.set_params(warm_start=False) + + n_samples = 100 + X = rng.normal(loc=100, size=(n_samples, 2)) + X = _pairwise_estimator_convert_X(X, estimator) + if is_regressor(estimator_orig): + y = rng.normal(size=n_samples) + else: + y = rng.randint(low=0, high=2, size=n_samples) + y = _enforce_estimator_tags_y(estimator, y) + + if not _safe_tags(estimator).get("stateless", False): + # stateless estimators (such as FunctionTransformer) are always "fit"! + try: + check_is_fitted(estimator) + raise (f"{estimator.__name__} passes check_is_fitted before being fit!") + except NotFittedError: + pass + estimator.fit(X, y) + check_is_fitted(estimator) + + def check_n_features_in(name, estimator_orig): # Make sure that n_features_in_ attribute doesn't exist until fit is # called, and that its value is correct. diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 1a1449ecc209f..e0d272485db03 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -51,7 +51,7 @@ FLOAT_DTYPES, ) from sklearn.utils.validation import _check_fit_params - +from sklearn.base import BaseEstimator import sklearn from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning @@ -750,6 +750,20 @@ def test_check_symmetric(): assert_array_equal(output, arr_sym) +def test_check_is_fitted_with_is_fitted(): + class Estimator(BaseEstimator): + def fit(self, **kwargs): + self._is_fitted = True + return self + + def __is_fitted__(self): + return hasattr(self, "_is_fitted") and self._is_fitted + + with pytest.raises(NotFittedError): + check_is_fitted(Estimator()) + check_is_fitted(Estimator().fit()) + + def test_check_is_fitted(): # Check is TypeError raised when non estimator instance passed with pytest.raises(TypeError): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 98bf6ac8bdb6a..fbfaba9567d80 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1142,8 +1142,9 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): fitted attributes (ending with a trailing underscore) and otherwise raises a NotFittedError with the given message. - This utility is meant to be used internally by estimators themselves, - typically in their own predict / transform methods. + If an estimator does not set any attributes with a trailing underscore, it + can define a ``__is_fitted__`` method returning a boolean to specify if the + estimator is fitted or not. Parameters ---------- @@ -1194,13 +1195,16 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): if attributes is not None: if not isinstance(attributes, (list, tuple)): attributes = [attributes] - attrs = all_or_any([hasattr(estimator, attr) for attr in attributes]) + fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) + elif hasattr(estimator, "__is_fitted__"): + fitted = estimator.__is_fitted__() else: attrs = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") ] + fitted = len(attrs) > 0 - if not attrs: + if not fitted: raise NotFittedError(msg % {"name": type(estimator).__name__}) From 54e5efd7f66b25aa9dbde4f049e56230b0f14497 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 16:10:27 +0200 Subject: [PATCH 02/17] fix description --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5fa8e1e5f82cd..d47e2166f301e 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3317,8 +3317,8 @@ def check_fit_idempotent(name, estimator_orig): def check_fit_check_is_fitted(name, estimator_orig): - # Make sure that n_features_in_ attribute doesn't exist until fit is - # called, and that its value is correct. + # Make sure that estimator doesn't pass check_is_fitted before calling fit + # and that passes check_is_fitted once it's fit. rng = np.random.RandomState(42) From 5076b691a7e59067370535591e587ddcac6adb99 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 16:11:32 +0200 Subject: [PATCH 03/17] simplify validation --- sklearn/utils/validation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index fbfaba9567d80..b06604f01303e 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1199,10 +1199,9 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): elif hasattr(estimator, "__is_fitted__"): fitted = estimator.__is_fitted__() else: - attrs = [ + fitted = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") ] - fitted = len(attrs) > 0 if not fitted: raise NotFittedError(msg % {"name": type(estimator).__name__}) From 544ec0f98c53c339c2e6b1c20bed267d4d487638 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 16:18:56 +0200 Subject: [PATCH 04/17] add whats_new --- doc/whats_new/v1.0.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 7f44c62eb7329..837528ca69486 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -701,6 +701,12 @@ Changelog unavailable on the basis of state, in a more readable way. :pr:`19948` by `Joel Nothman`_. +_ |Enhancement| :func:`utils.validation.check_is_fitted` now uses + ``__is_fitted__``if available, instead of checking for attributes ending with + an underscore. This also makes :class:`Pipeline` and + :class:`preprocessing.FunctionTransformer` pass `check_is_fitted(estimator)`. + :pr:20657 by `Adrin Jalali`_. + - |Fix| Fixed a bug in :func:`utils.sparsefuncs.mean_variance_axis` where the precision of the computed variance was very poor when the real variance is exactly zero. :pr:`19766` by :user:`Jérémie du Boisberranger `. From 17853da80923354c2571387ae70b7ae0da96d0b0 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 16:21:26 +0200 Subject: [PATCH 05/17] fix changelog format --- doc/whats_new/v1.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 837528ca69486..5c5ff4a41b062 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -702,10 +702,10 @@ Changelog :pr:`19948` by `Joel Nothman`_. _ |Enhancement| :func:`utils.validation.check_is_fitted` now uses - ``__is_fitted__``if available, instead of checking for attributes ending with + ``__is_fitted__`` if available, instead of checking for attributes ending with an underscore. This also makes :class:`Pipeline` and - :class:`preprocessing.FunctionTransformer` pass `check_is_fitted(estimator)`. - :pr:20657 by `Adrin Jalali`_. + :class:`preprocessing.FunctionTransformer` pass + ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_. - |Fix| Fixed a bug in :func:`utils.sparsefuncs.mean_variance_axis` where the precision of the computed variance was very poor when the real variance is From a93426fbb99feac2045c9da93820508d05558b17 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Mon, 2 Aug 2021 17:32:26 +0200 Subject: [PATCH 06/17] test pipeline and check_estimator --- sklearn/tests/test_pipeline.py | 11 +++++++- sklearn/utils/estimator_checks.py | 5 +++- sklearn/utils/tests/test_estimator_checks.py | 27 ++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 4176e1a65f4b2..1ee058e0da9a1 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -21,7 +21,8 @@ MinimalRegressor, MinimalTransformer, ) - +from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted from sklearn.base import clone, is_classifier, BaseEstimator, TransformerMixin from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union from sklearn.svm import SVC @@ -1361,3 +1362,11 @@ def test_search_cv_using_minimal_compatible_estimator(Predictor): else: assert_allclose(y_pred, y.mean()) assert model.score(X, y) == pytest.approx(r2_score(y, y_pred)) + + +def test_test_pipeline_check_if_fitted(): + pipeline = Pipeline([("clf", SVC())]) + with pytest.raises(NotFittedError): + check_is_fitted(pipeline) + pipeline.fit(iris.data, iris.target) + check_is_fitted(pipeline) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index d47e2166f301e..2a0e7a5bdd349 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3340,7 +3340,10 @@ def check_fit_check_is_fitted(name, estimator_orig): # stateless estimators (such as FunctionTransformer) are always "fit"! try: check_is_fitted(estimator) - raise (f"{estimator.__name__} passes check_is_fitted before being fit!") + raise Exception( + f"{estimator.__class__.__name__} passes check_is_fitted before being" + " fit!" + ) except NotFittedError: pass estimator.fit(X, y) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 8ff8d8cf5e782..a24dc40e337e2 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -20,11 +20,13 @@ MinimalTransformer, SkipTest, ) +from sklearn.utils.metaestimators import available_if from sklearn.utils.estimator_checks import check_estimator, _NotAnArray from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier from sklearn.utils.estimator_checks import set_random_state from sklearn.utils.estimator_checks import _set_checking_parameters from sklearn.utils.estimator_checks import check_estimators_unfitted +from sklearn.utils.estimator_checks import check_fit_check_is_fitted from sklearn.utils.estimator_checks import check_fit_score_takes_y from sklearn.utils.estimator_checks import check_no_attributes_set_in_init from sklearn.utils.estimator_checks import check_classifier_data_not_an_array @@ -748,3 +750,28 @@ def test_minimal_class_implementation_checks(): minimal_estimators = [MinimalTransformer(), MinimalRegressor(), MinimalClassifier()] for estimator in minimal_estimators: check_estimator(estimator) + + +def test_check_fit_check_is_fitted(): + class Estimator(BaseEstimator): + def __init__(self, behavior="attribute"): + self.behavior = behavior + + def fit(self, X, y, **kwargs): + if self.behavior == "attribute": + self.is_fitted_ = True + elif self.behavior == "method": + self._is_fitted = True + return self + + @available_if(lambda self: self.behavior in {"method", "always-true"}) + def __is_fitted__(self): + if self.behavior == "always-true": + return True + return hasattr(self, "_is_fitted") + + with raises(Exception, match="passes check_is_fitted before being fit"): + check_fit_check_is_fitted("estimator", Estimator(behavior="always-true")) + + check_fit_check_is_fitted("estimator", Estimator(behavior="method")) + check_fit_check_is_fitted("estimator", Estimator(behavior="attribute")) From 64c6a964a0d7568e9c3ba14e01c40494c5646a85 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 3 Aug 2021 11:59:05 +0200 Subject: [PATCH 07/17] rename to __sk_is_fitted__ --- doc/whats_new/v1.0.rst | 2 +- sklearn/pipeline.py | 2 +- sklearn/preprocessing/_function_transformer.py | 2 +- sklearn/utils/tests/test_estimator_checks.py | 2 +- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 5c5ff4a41b062..bd7fc3511941a 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -702,7 +702,7 @@ Changelog :pr:`19948` by `Joel Nothman`_. _ |Enhancement| :func:`utils.validation.check_is_fitted` now uses - ``__is_fitted__`` if available, instead of checking for attributes ending with + ``__sk_is_fitted__`` if available, instead of checking for attributes ending with an underscore. This also makes :class:`Pipeline` and :class:`preprocessing.FunctionTransformer` pass ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index d679e2df9694f..4a21d1e27fb07 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -659,7 +659,7 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ - def __is_fitted__(self): + def __sk_is_fitted__(self): """Check if pipeline is fitted. Returns diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index ad5e34c38b683..57fdd0a3684f1 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -176,7 +176,7 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) - def __is_fitted__(self): + def __sk_is_fitted__(self): """Return True since FunctionTransfomer is stateless.""" return True diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index a24dc40e337e2..22575369aec1d 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -765,7 +765,7 @@ def fit(self, X, y, **kwargs): return self @available_if(lambda self: self.behavior in {"method", "always-true"}) - def __is_fitted__(self): + def __sk_is_fitted__(self): if self.behavior == "always-true": return True return hasattr(self, "_is_fitted") diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index e0d272485db03..3d5d3f19b4ffc 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -756,7 +756,7 @@ def fit(self, **kwargs): self._is_fitted = True return self - def __is_fitted__(self): + def __sk_is_fitted__(self): return hasattr(self, "_is_fitted") and self._is_fitted with pytest.raises(NotFittedError): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index b06604f01303e..02767663bb803 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1143,7 +1143,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): raises a NotFittedError with the given message. If an estimator does not set any attributes with a trailing underscore, it - can define a ``__is_fitted__`` method returning a boolean to specify if the + can define a ``__sk_is_fitted__`` method returning a boolean to specify if the estimator is fitted or not. Parameters @@ -1196,8 +1196,8 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): if not isinstance(attributes, (list, tuple)): attributes = [attributes] fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) - elif hasattr(estimator, "__is_fitted__"): - fitted = estimator.__is_fitted__() + elif hasattr(estimator, "__sk_is_fitted__"): + fitted = estimator.__sk_is_fitted__() else: fitted = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") From 552fec16f56df89b594c6cc5adbf373d1670533b Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 3 Aug 2021 12:01:59 +0200 Subject: [PATCH 08/17] use LogisticRegression in pipeline instead --- sklearn/tests/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 1ee058e0da9a1..3986495b5d13c 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1364,8 +1364,8 @@ def test_search_cv_using_minimal_compatible_estimator(Predictor): assert model.score(X, y) == pytest.approx(r2_score(y, y_pred)) -def test_test_pipeline_check_if_fitted(): - pipeline = Pipeline([("clf", SVC())]) +def test_pipeline_check_if_fitted(): + pipeline = Pipeline([("clf", LogisticRegression())]) with pytest.raises(NotFittedError): check_is_fitted(pipeline) pipeline.fit(iris.data, iris.target) From f59e95e5242bc1f54fa2e7df7370c7e94fb31577 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 4 Aug 2021 12:49:14 +0200 Subject: [PATCH 09/17] try a fake estimator --- sklearn/tests/test_pipeline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 3986495b5d13c..4ec5c7f081a15 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1365,7 +1365,12 @@ def test_search_cv_using_minimal_compatible_estimator(Predictor): def test_pipeline_check_if_fitted(): - pipeline = Pipeline([("clf", LogisticRegression())]) + class Estimator(BaseEstimator): + def fit(self, X, y): + self.fitted_ = True + return self + + pipeline = Pipeline([("clf", Estimator())]) with pytest.raises(NotFittedError): check_is_fitted(pipeline) pipeline.fit(iris.data, iris.target) From 282e23b3805213fef22df25c88616da34ca9af47 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 6 Aug 2021 14:16:53 +0200 Subject: [PATCH 10/17] change from method to property --- doc/whats_new/v1.0.rst | 4 +-- sklearn/pipeline.py | 1 + .../preprocessing/_function_transformer.py | 1 + sklearn/utils/tests/test_estimator_checks.py | 25 +++++++++++-------- sklearn/utils/tests/test_validation.py | 1 + sklearn/utils/validation.py | 6 ++--- 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index bd7fc3511941a..66f6e8642f941 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -702,8 +702,8 @@ Changelog :pr:`19948` by `Joel Nothman`_. _ |Enhancement| :func:`utils.validation.check_is_fitted` now uses - ``__sk_is_fitted__`` if available, instead of checking for attributes ending with - an underscore. This also makes :class:`Pipeline` and + ``__sk_is_fitted__`` property if available, instead of checking for attributes + ending with an underscore. This also makes :class:`Pipeline` and :class:`preprocessing.FunctionTransformer` pass ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 4a21d1e27fb07..bb0d8a07d8eff 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -659,6 +659,7 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ + @property def __sk_is_fitted__(self): """Check if pipeline is fitted. diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 57fdd0a3684f1..a043a01a54948 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -176,6 +176,7 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) + @property def __sk_is_fitted__(self): """Return True since FunctionTransfomer is stateless.""" return True diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 22575369aec1d..5c6d34645a8c6 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -20,7 +20,6 @@ MinimalTransformer, SkipTest, ) -from sklearn.utils.metaestimators import available_if from sklearn.utils.estimator_checks import check_estimator, _NotAnArray from sklearn.utils.estimator_checks import check_class_weight_balanced_linear_classifier from sklearn.utils.estimator_checks import set_random_state @@ -754,24 +753,28 @@ def test_minimal_class_implementation_checks(): def test_check_fit_check_is_fitted(): class Estimator(BaseEstimator): - def __init__(self, behavior="attribute"): + def fit(self, X, y, **kwargs): + self.is_fitted_ = True + return self + + class EstimatorProperty(BaseEstimator): + def __init__(self, behavior="always-true"): self.behavior = behavior - def fit(self, X, y, **kwargs): - if self.behavior == "attribute": - self.is_fitted_ = True - elif self.behavior == "method": - self._is_fitted = True + def fit(self, X, y): + self._is_fitted = True return self - @available_if(lambda self: self.behavior in {"method", "always-true"}) + @property def __sk_is_fitted__(self): if self.behavior == "always-true": return True return hasattr(self, "_is_fitted") with raises(Exception, match="passes check_is_fitted before being fit"): - check_fit_check_is_fitted("estimator", Estimator(behavior="always-true")) + check_fit_check_is_fitted( + "estimator", EstimatorProperty(behavior="always-true") + ) - check_fit_check_is_fitted("estimator", Estimator(behavior="method")) - check_fit_check_is_fitted("estimator", Estimator(behavior="attribute")) + check_fit_check_is_fitted("estimator", EstimatorProperty(behavior="method")) + check_fit_check_is_fitted("estimator", Estimator()) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 3d5d3f19b4ffc..243238db4391c 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -756,6 +756,7 @@ def fit(self, **kwargs): self._is_fitted = True return self + @property def __sk_is_fitted__(self): return hasattr(self, "_is_fitted") and self._is_fitted diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 02767663bb803..784f053174307 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1143,8 +1143,8 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): raises a NotFittedError with the given message. If an estimator does not set any attributes with a trailing underscore, it - can define a ``__sk_is_fitted__`` method returning a boolean to specify if the - estimator is fitted or not. + can define a ``__sk_is_fitted__`` property returning a boolean to specify + if the estimator is fitted or not. Parameters ---------- @@ -1197,7 +1197,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): attributes = [attributes] fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) elif hasattr(estimator, "__sk_is_fitted__"): - fitted = estimator.__sk_is_fitted__() + fitted = estimator.__sk_is_fitted__ else: fitted = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") From bb72f97505792472f873e6087d54662b58bd043c Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 6 Aug 2021 15:31:16 +0200 Subject: [PATCH 11/17] address Guillaume's comments --- sklearn/pipeline.py | 8 +------- sklearn/utils/estimator_checks.py | 7 ++++++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index bb0d8a07d8eff..c7cae2e476f81 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -661,13 +661,7 @@ def n_features_in_(self): @property def __sk_is_fitted__(self): - """Check if pipeline is fitted. - - Returns - ------- - bool - True if the last step is fitted, false otherwise. - """ + """Indicate whether pipeline has been fit.""" try: # check if the last step of the pipeline is fitted check_is_fitted(self.steps[-1][1]) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 6f5c24efae88c..f7a4ec0146ea7 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3526,7 +3526,12 @@ def check_fit_check_is_fitted(name, estimator_orig): except NotFittedError: pass estimator.fit(X, y) - check_is_fitted(estimator) + try: + check_is_fitted(estimator) + except NotFittedError as e: + raise NotFittedError( + "Estimator fails to pass `check_is_fitted` even though it has been fit." + ) from e def check_n_features_in(name, estimator_orig): From 9552776c0953561cc3cda7d12d318f6bd269f531 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 6 Aug 2021 16:04:31 +0200 Subject: [PATCH 12/17] FunctionTransformer uses a class attribute --- sklearn/preprocessing/_function_transformer.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index a043a01a54948..8d48a6af2fa44 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -82,6 +82,10 @@ class FunctionTransformer(TransformerMixin, BaseEstimator): [1.0986..., 1.3862...]]) """ + # This is a stateless transfomer, and always passes + # ``check_is_fitted(transformer)`` + __sk_is_fitted__ = True + def __init__( self, func=None, @@ -176,10 +180,5 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) - @property - def __sk_is_fitted__(self): - """Return True since FunctionTransfomer is stateless.""" - return True - def _more_tags(self): return {"no_validation": not self.validate, "stateless": True} From c9384bf28ff6f2585232c8f9e2f44dca50c7e287 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 Aug 2021 15:16:08 +0200 Subject: [PATCH 13/17] Revert "FunctionTransformer uses a class attribute" This reverts commit 9552776c0953561cc3cda7d12d318f6bd269f531. --- sklearn/preprocessing/_function_transformer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 8d48a6af2fa44..a043a01a54948 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -82,10 +82,6 @@ class FunctionTransformer(TransformerMixin, BaseEstimator): [1.0986..., 1.3862...]]) """ - # This is a stateless transfomer, and always passes - # ``check_is_fitted(transformer)`` - __sk_is_fitted__ = True - def __init__( self, func=None, @@ -180,5 +176,10 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) + @property + def __sk_is_fitted__(self): + """Return True since FunctionTransfomer is stateless.""" + return True + def _more_tags(self): return {"no_validation": not self.validate, "stateless": True} From c0a84dde392c4461efe1de5706dc9c5dc4db875e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 Aug 2021 15:19:12 +0200 Subject: [PATCH 14/17] Revert "change from method to property" This reverts commit 282e23b3805213fef22df25c88616da34ca9af47. --- doc/whats_new/v1.0.rst | 4 +-- sklearn/pipeline.py | 1 - .../preprocessing/_function_transformer.py | 1 - sklearn/utils/tests/test_estimator_checks.py | 25 ++++++++----------- sklearn/utils/tests/test_validation.py | 1 - sklearn/utils/validation.py | 6 ++--- 6 files changed, 16 insertions(+), 22 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index ab4d9b266fad2..2adac810be246 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -724,8 +724,8 @@ Changelog :pr:`19948` by `Joel Nothman`_. _ |Enhancement| :func:`utils.validation.check_is_fitted` now uses - ``__sk_is_fitted__`` property if available, instead of checking for attributes - ending with an underscore. This also makes :class:`Pipeline` and + ``__sk_is_fitted__`` if available, instead of checking for attributes ending with + an underscore. This also makes :class:`Pipeline` and :class:`preprocessing.FunctionTransformer` pass ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index c7cae2e476f81..ec4992d321a9c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -659,7 +659,6 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ - @property def __sk_is_fitted__(self): """Indicate whether pipeline has been fit.""" try: diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index a043a01a54948..57fdd0a3684f1 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -176,7 +176,6 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) - @property def __sk_is_fitted__(self): """Return True since FunctionTransfomer is stateless.""" return True diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index e7f32bdc746ca..a7cd9a3d0498f 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -34,6 +34,7 @@ from sklearn.utils.validation import check_array from sklearn.utils import all_estimators from sklearn.exceptions import SkipTestWarning +from sklearn.metaestimators import available_if from sklearn.utils.estimator_checks import ( _NotAnArray, @@ -991,28 +992,24 @@ def test_minimal_class_implementation_checks(): def test_check_fit_check_is_fitted(): class Estimator(BaseEstimator): - def fit(self, X, y, **kwargs): - self.is_fitted_ = True - return self - - class EstimatorProperty(BaseEstimator): - def __init__(self, behavior="always-true"): + def __init__(self, behavior="attribute"): self.behavior = behavior - def fit(self, X, y): - self._is_fitted = True + def fit(self, X, y, **kwargs): + if self.behavior == "attribute": + self.is_fitted_ = True + elif self.behavior == "method": + self._is_fitted = True return self - @property + @available_if(lambda self: self.behavior in {"method", "always-true"}) def __sk_is_fitted__(self): if self.behavior == "always-true": return True return hasattr(self, "_is_fitted") with raises(Exception, match="passes check_is_fitted before being fit"): - check_fit_check_is_fitted( - "estimator", EstimatorProperty(behavior="always-true") - ) + check_fit_check_is_fitted("estimator", Estimator(behavior="always-true")) - check_fit_check_is_fitted("estimator", EstimatorProperty(behavior="method")) - check_fit_check_is_fitted("estimator", Estimator()) + check_fit_check_is_fitted("estimator", Estimator(behavior="method")) + check_fit_check_is_fitted("estimator", Estimator(behavior="attribute")) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 243238db4391c..3d5d3f19b4ffc 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -756,7 +756,6 @@ def fit(self, **kwargs): self._is_fitted = True return self - @property def __sk_is_fitted__(self): return hasattr(self, "_is_fitted") and self._is_fitted diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 784f053174307..02767663bb803 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1143,8 +1143,8 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): raises a NotFittedError with the given message. If an estimator does not set any attributes with a trailing underscore, it - can define a ``__sk_is_fitted__`` property returning a boolean to specify - if the estimator is fitted or not. + can define a ``__sk_is_fitted__`` method returning a boolean to specify if the + estimator is fitted or not. Parameters ---------- @@ -1197,7 +1197,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): attributes = [attributes] fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) elif hasattr(estimator, "__sk_is_fitted__"): - fitted = estimator.__sk_is_fitted__ + fitted = estimator.__sk_is_fitted__() else: fitted = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") From d30ba5d991fe4dbe30508db6835b13da58bd4377 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 Aug 2021 15:22:56 +0200 Subject: [PATCH 15/17] Olivier's comments --- sklearn/pipeline.py | 3 +++ sklearn/utils/estimator_checks.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index ec4992d321a9c..236ffea43034f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -663,6 +663,9 @@ def __sk_is_fitted__(self): """Indicate whether pipeline has been fit.""" try: # check if the last step of the pipeline is fitted + # we only check the last step since if the last step is fit, it + # means the previous steps should also be fit. This is faster than + # checking if every step of the pipeline is fit. check_is_fitted(self.steps[-1][1]) return True except NotFittedError: diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index f7a4ec0146ea7..612c61753a1cb 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3519,7 +3519,7 @@ def check_fit_check_is_fitted(name, estimator_orig): # stateless estimators (such as FunctionTransformer) are always "fit"! try: check_is_fitted(estimator) - raise Exception( + raise AssertionError( f"{estimator.__class__.__name__} passes check_is_fitted before being" " fit!" ) From 89d41cb0cb0b5b634bb607423570215df581a0d7 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 Aug 2021 15:23:49 +0200 Subject: [PATCH 16/17] rename to __sklearn_is_fitted__ --- doc/whats_new/v1.0.rst | 2 +- sklearn/pipeline.py | 2 +- sklearn/preprocessing/_function_transformer.py | 2 +- sklearn/utils/tests/test_estimator_checks.py | 2 +- sklearn/utils/tests/test_validation.py | 2 +- sklearn/utils/validation.py | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 2adac810be246..5f50465f5aef3 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -724,7 +724,7 @@ Changelog :pr:`19948` by `Joel Nothman`_. _ |Enhancement| :func:`utils.validation.check_is_fitted` now uses - ``__sk_is_fitted__`` if available, instead of checking for attributes ending with + ``__sklearn_is_fitted__`` if available, instead of checking for attributes ending with an underscore. This also makes :class:`Pipeline` and :class:`preprocessing.FunctionTransformer` pass ``check_is_fitted(estimator)``. :pr:`20657` by `Adrin Jalali`_. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 236ffea43034f..35f0fa3768b45 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -659,7 +659,7 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ - def __sk_is_fitted__(self): + def __sklearn_is_fitted__(self): """Indicate whether pipeline has been fit.""" try: # check if the last step of the pipeline is fitted diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index 57fdd0a3684f1..202b6ec2f6cdd 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -176,7 +176,7 @@ def _transform(self, X, func=None, kw_args=None): return func(X, **(kw_args if kw_args else {})) - def __sk_is_fitted__(self): + def __sklearn_is_fitted__(self): """Return True since FunctionTransfomer is stateless.""" return True diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index a7cd9a3d0498f..91219b8b43f08 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1003,7 +1003,7 @@ def fit(self, X, y, **kwargs): return self @available_if(lambda self: self.behavior in {"method", "always-true"}) - def __sk_is_fitted__(self): + def __sklearn_is_fitted__(self): if self.behavior == "always-true": return True return hasattr(self, "_is_fitted") diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 3d5d3f19b4ffc..35afff12b7ca4 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -756,7 +756,7 @@ def fit(self, **kwargs): self._is_fitted = True return self - def __sk_is_fitted__(self): + def __sklearn_is_fitted__(self): return hasattr(self, "_is_fitted") and self._is_fitted with pytest.raises(NotFittedError): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 02767663bb803..aa7a23ffcdf9c 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1143,7 +1143,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): raises a NotFittedError with the given message. If an estimator does not set any attributes with a trailing underscore, it - can define a ``__sk_is_fitted__`` method returning a boolean to specify if the + can define a ``__sklearn_is_fitted__`` method returning a boolean to specify if the estimator is fitted or not. Parameters @@ -1196,8 +1196,8 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): if not isinstance(attributes, (list, tuple)): attributes = [attributes] fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) - elif hasattr(estimator, "__sk_is_fitted__"): - fitted = estimator.__sk_is_fitted__() + elif hasattr(estimator, "__sklearn_is_fitted__"): + fitted = estimator.__sklearn_is_fitted__() else: fitted = [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") From 1b08447f963daa93fcbc89a1b601e2c98215144e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 12 Aug 2021 16:14:35 +0200 Subject: [PATCH 17/17] fix import --- sklearn/utils/tests/test_estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 91219b8b43f08..89e2e43651088 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -34,7 +34,7 @@ from sklearn.utils.validation import check_array from sklearn.utils import all_estimators from sklearn.exceptions import SkipTestWarning -from sklearn.metaestimators import available_if +from sklearn.utils.metaestimators import available_if from sklearn.utils.estimator_checks import ( _NotAnArray,