scikit-learn · amueller · Sep 8, 2017 · Sep 19, 2017 · Sep 9, 2019 · Sep 9, 2019
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
@@ -1037,6 +1037,10 @@ def _more_tags(self):
             # tuples and `fit` is not called yet to validate the steps.
             pass
 
+        # hack to make common cases work:
+        # we assume the pipeline can handle NaN if all the steps can
+        tags["allow_nan"] = all(s[1]._get_tags()["allow_nan"] for s in self.steps)
+
         return tags
 
     def get_feature_names_out(self, input_features=None):
@@ -1817,6 +1821,14 @@ def _update_transformer_list(self, transformers):
             for name, old in self.transformer_list
         ]
 
+    def _more_tags(self):
+        # The FeatureUnion can handle NaNs if all the steps can.
+        return {
+            "allow_nan": all(
+                s[1]._get_tags()["allow_nan"] for s in self.transformer_list
+            )
+        }
+
     @property
     def n_features_in_(self):
         """Number of features seen during :term:`fit`."""

diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
@@ -6,21 +6,24 @@
 import pytest
 
 from sklearn.base import BaseEstimator, is_regressor
+from sklearn.cluster import KMeans
 from sklearn.datasets import make_classification
+from sklearn.decomposition import PCA
 from sklearn.ensemble import BaggingClassifier
 from sklearn.exceptions import NotFittedError
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.feature_selection import RFE, RFECV
+from sklearn.feature_selection import RFE, RFECV, SelectFromModel
 from sklearn.linear_model import LogisticRegression, Ridge
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
-from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline, make_union
 from sklearn.preprocessing import MaxAbsScaler, StandardScaler
 from sklearn.semi_supervised import SelfTrainingClassifier
 from sklearn.utils import all_estimators
 from sklearn.utils._testing import set_random_state
 from sklearn.utils.estimator_checks import (
     _enforce_estimator_tags_X,
     _enforce_estimator_tags_y,
+    parametrize_with_checks,
 )
 from sklearn.utils.validation import check_is_fitted
 
@@ -74,6 +77,38 @@ def __init__(
     ),
 ]
 
+TESTED_META = [
+    # pipelines
+    Pipeline((("ss", StandardScaler()),)),
+    Pipeline([("ss", StandardScaler())]),
+    make_pipeline(StandardScaler(), LogisticRegression()),
+    # union
+    make_union(StandardScaler()),
+    # union and pipeline
+    make_pipeline(make_union(PCA(), StandardScaler()), LogisticRegression()),
+    # pipeline with clustering
+    make_pipeline(KMeans(random_state=0)),
+    # SelectFromModel
+    make_pipeline(
+        SelectFromModel(LogisticRegression(), threshold=-np.inf), LogisticRegression()
+    ),
+    # grid-search
+    GridSearchCV(LogisticRegression(), {"C": [0.1, 1]}, cv=2),
+    # will fail tragically
+    # make_pipeline(StandardScaler(), None)
+]
+
+
+@parametrize_with_checks(TESTED_META)
+def test_metaestimators_check_estimator(estimator, check):
+    if check.func.__name__ in [
+        "check_estimators_overwrite_params",
+        "check_dont_overwrite_parameters",
+    ] and (isinstance(estimator, Pipeline) or isinstance(estimator, FeatureUnion)):
+        # we don't clone in pipeline or feature union
+        return
+    check(estimator)
+
 
 def test_metaestimator_delegation():
     # Ensures specified metaestimators have methods iff subestimator does

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
@@ -95,6 +95,12 @@ def inverse_transform(self, X):
         return X
 
 
+class FitTransf(NoTrans):
+    # has fit_transform but not transform
+    def fit_transform(self, X, y=None):
+        return X
+
+
 class TransfFitParams(Transf):
     def fit(self, X, y, **fit_params):
         self.fit_params = fit_params
@@ -812,6 +818,7 @@ def test_pipeline_ducktyping():
     pipeline.predict
     pipeline.transform
     pipeline.inverse_transform
+    pipeline.fit_transform
 
     pipeline = make_pipeline(Transf())
     assert not hasattr(pipeline, "predict")
@@ -823,6 +830,7 @@ def test_pipeline_ducktyping():
     assert not hasattr(pipeline, "predict")
     pipeline.transform
     pipeline.inverse_transform
+    pipeline.fit_transform
 
     pipeline = make_pipeline(Transf(), NoInvTransf())
     assert not hasattr(pipeline, "predict")
@@ -834,6 +842,10 @@ def test_pipeline_ducktyping():
     pipeline.transform
     assert not hasattr(pipeline, "inverse_transform")
 
+    pipeline = make_pipeline(FitTransf())
+    assert not hasattr(pipeline, "transform")
+    pipeline.fit_transform
+
 
 def test_make_pipeline():
     t1 = Transf()
@@ -1260,7 +1272,6 @@ def test_step_name_validation():
             est.set_params(**{param: bad_steps})
             with pytest.raises(ValueError, match=message):
                 est.fit([[1]], [1])
-
             with pytest.raises(ValueError, match=message):
                 est.fit_transform([[1]], [1])