From 08d3d86e7b5da8415a091753cc9964eccec6d13e Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 25 Aug 2021 19:45:31 -0400 Subject: [PATCH 1/4] ENH Adds feature_names_in_ to kernel_approximation --- sklearn/kernel_approximation.py | 30 ++++++++++++++++++++++++++++++ sklearn/tests/test_common.py | 1 - sklearn/utils/estimator_checks.py | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 08957191ba4b8..eb7930f4db319 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -83,6 +83,12 @@ class PolynomialCountSketch(BaseEstimator, TransformerMixin): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + See Also -------- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. @@ -256,6 +262,12 @@ class RBFSampler(TransformerMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + See Also -------- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. @@ -386,6 +398,12 @@ class SkewedChi2Sampler(TransformerMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + See Also -------- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. @@ -520,6 +538,12 @@ class AdditiveChi2Sampler(TransformerMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + See Also -------- SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of @@ -764,6 +788,12 @@ class Nystroem(TransformerMixin, BaseEstimator): .. versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + See Also -------- AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 008bdee7e646b..6c2d58266260c 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -327,7 +327,6 @@ def test_check_n_features_in_after_fitting(estimator): "compose", "ensemble", "feature_extraction", - "kernel_approximation", "model_selection", "multiclass", "multioutput", diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index cf88785807d06..7a51ffa0b98b7 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3722,6 +3722,7 @@ def check_dataframe_column_names_consistency(name, estimator_orig): set_random_state(estimator) X_orig = rng.normal(size=(150, 8)) + X_orig -= X_orig.min() + 0.5 X_orig = _enforce_estimator_tags_x(estimator, X_orig) X_orig = _pairwise_estimator_convert_X(X_orig, estimator) n_samples, n_features = X_orig.shape From f5b4c6d45c9339cb4f5dc87892fdaa072d794057 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 25 Aug 2021 19:47:10 -0400 Subject: [PATCH 2/4] TST Adds doctest --- sklearn/utils/estimator_checks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 7a51ffa0b98b7..007e94952ae49 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3744,6 +3744,17 @@ def check_dataframe_column_names_consistency(name, estimator_orig): ) assert_array_equal(estimator.feature_names_in_, names) + # Only check sklearn estimators for feature_names_in_ in docstring + module_name = estimator_orig.__module__ + if ( + module_name.startswith("sklearn.") + and not ("test_" in module_name or module_name.endswith("_testing")) + and ("feature_names_in_" not in (estimator.__doc__)) + ): + raise ValueError( # pragma: nocover + f"Estimator {name} does not document its feature_names_in_ attribute" + ) + check_methods = [] for method in ( "predict", From 4029a3ab2424528f1feb8669fd2e18ddada68e61 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 25 Aug 2021 19:52:36 -0400 Subject: [PATCH 3/4] REV Remove --- sklearn/utils/estimator_checks.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 007e94952ae49..7a51ffa0b98b7 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3744,17 +3744,6 @@ def check_dataframe_column_names_consistency(name, estimator_orig): ) assert_array_equal(estimator.feature_names_in_, names) - # Only check sklearn estimators for feature_names_in_ in docstring - module_name = estimator_orig.__module__ - if ( - module_name.startswith("sklearn.") - and not ("test_" in module_name or module_name.endswith("_testing")) - and ("feature_names_in_" not in (estimator.__doc__)) - ): - raise ValueError( # pragma: nocover - f"Estimator {name} does not document its feature_names_in_ attribute" - ) - check_methods = [] for method in ( "predict", From de0daeec38cd749968d0391f204158f1a09d6126 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 26 Aug 2021 17:19:35 +0200 Subject: [PATCH 4/4] Add comment --- sklearn/utils/estimator_checks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 7a51ffa0b98b7..5d09b265ee6fc 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3722,6 +3722,8 @@ def check_dataframe_column_names_consistency(name, estimator_orig): set_random_state(estimator) X_orig = rng.normal(size=(150, 8)) + + # Some picky estimators (e.g. SkewedChi2Sampler) only accept skewed positive data. X_orig -= X_orig.min() + 0.5 X_orig = _enforce_estimator_tags_x(estimator, X_orig) X_orig = _pairwise_estimator_convert_X(X_orig, estimator)