From 08d3d86e7b5da8415a091753cc9964eccec6d13e Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 25 Aug 2021 19:45:31 -0400
Subject: [PATCH 1/4] ENH Adds feature_names_in_ to kernel_approximation

---
 sklearn/kernel_approximation.py   | 30 ++++++++++++++++++++++++++++++
 sklearn/tests/test_common.py      |  1 -
 sklearn/utils/estimator_checks.py |  1 +
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 08957191ba4b8..eb7930f4db319 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -83,6 +83,12 @@ class PolynomialCountSketch(BaseEstimator, TransformerMixin):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
@@ -256,6 +262,12 @@ class RBFSampler(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
@@ -386,6 +398,12 @@ class SkewedChi2Sampler(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
@@ -520,6 +538,12 @@ class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of
@@ -764,6 +788,12 @@ class Nystroem(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 008bdee7e646b..6c2d58266260c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -327,7 +327,6 @@ def test_check_n_features_in_after_fitting(estimator):
     "compose",
     "ensemble",
     "feature_extraction",
-    "kernel_approximation",
     "model_selection",
     "multiclass",
     "multioutput",
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index cf88785807d06..7a51ffa0b98b7 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3722,6 +3722,7 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
     set_random_state(estimator)
 
     X_orig = rng.normal(size=(150, 8))
+    X_orig -= X_orig.min() + 0.5
     X_orig = _enforce_estimator_tags_x(estimator, X_orig)
     X_orig = _pairwise_estimator_convert_X(X_orig, estimator)
     n_samples, n_features = X_orig.shape

From f5b4c6d45c9339cb4f5dc87892fdaa072d794057 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 25 Aug 2021 19:47:10 -0400
Subject: [PATCH 2/4] TST Adds doctest

---
 sklearn/utils/estimator_checks.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7a51ffa0b98b7..007e94952ae49 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3744,6 +3744,17 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
         )
     assert_array_equal(estimator.feature_names_in_, names)
 
+    # Only check sklearn estimators for feature_names_in_ in docstring
+    module_name = estimator_orig.__module__
+    if (
+        module_name.startswith("sklearn.")
+        and not ("test_" in module_name or module_name.endswith("_testing"))
+        and ("feature_names_in_" not in (estimator.__doc__))
+    ):
+        raise ValueError(  # pragma: nocover
+            f"Estimator {name} does not document its feature_names_in_ attribute"
+        )
+
     check_methods = []
     for method in (
         "predict",

From 4029a3ab2424528f1feb8669fd2e18ddada68e61 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 25 Aug 2021 19:52:36 -0400
Subject: [PATCH 3/4] REV Remove

---
 sklearn/utils/estimator_checks.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 007e94952ae49..7a51ffa0b98b7 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3744,17 +3744,6 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
         )
     assert_array_equal(estimator.feature_names_in_, names)
 
-    # Only check sklearn estimators for feature_names_in_ in docstring
-    module_name = estimator_orig.__module__
-    if (
-        module_name.startswith("sklearn.")
-        and not ("test_" in module_name or module_name.endswith("_testing"))
-        and ("feature_names_in_" not in (estimator.__doc__))
-    ):
-        raise ValueError(  # pragma: nocover
-            f"Estimator {name} does not document its feature_names_in_ attribute"
-        )
-
     check_methods = []
     for method in (
         "predict",

From de0daeec38cd749968d0391f204158f1a09d6126 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 26 Aug 2021 17:19:35 +0200
Subject: [PATCH 4/4] Add comment

---
 sklearn/utils/estimator_checks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7a51ffa0b98b7..5d09b265ee6fc 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3722,6 +3722,8 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
     set_random_state(estimator)
 
     X_orig = rng.normal(size=(150, 8))
+
+    # Some picky estimators (e.g. SkewedChi2Sampler) only accept skewed positive data.
     X_orig -= X_orig.min() + 0.5
     X_orig = _enforce_estimator_tags_x(estimator, X_orig)
     X_orig = _pairwise_estimator_convert_X(X_orig, estimator)