From b417fa155b79a6bfb2cd4de6a698e177b565e223 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 15:14:33 +0900 Subject: [PATCH 01/12] add parameters validation for partial_fit --- sklearn/feature_selection/_from_model.py | 32 +++++++++++ .../tests/test_from_model.py | 55 +++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 4868c8a7e86eb..d70b8fe15c3aa 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -385,6 +385,32 @@ def partial_fit(self, X, y=None, **fit_params): self : object Fitted estimator. """ + if self.max_features is not None: + if isinstance(self.max_features, numbers.Integral): + check_scalar( + self.max_features, + "max_features", + numbers.Integral, + min_val=0, + max_val=len(X[0]), + ) + self.max_features_ = self.max_features + elif callable(self.max_features): + max_features = self.max_features(X) + check_scalar( + max_features, + "max_features(X)", + numbers.Integral, + min_val=0, + max_val=len(X[0]), + ) + self.max_features_ = max_features + else: + raise TypeError( + "'max_features' must be either an int or a callable that takes" + f" 'X' as input. Got {self.max_features} instead." + ) + if self.prefit: if not hasattr(self, "estimator_"): try: @@ -400,6 +426,12 @@ def partial_fit(self, X, y=None, **fit_params): if not hasattr(self, "estimator_"): self.estimator_ = clone(self.estimator) self.estimator_.partial_fit(X, y, **fit_params) + + if hasattr(self.estimator_, "feature_names_in_"): + self.feature_names_in_ = self.estimator_.feature_names_in_ + else: + self._check_feature_names(X, reset=True) + return self @property diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index a9681635b8ddb..7bff7cb870f30 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -607,3 +607,58 @@ def importance_getter(estimator): warnings.simplefilter("error", UserWarning) selector.transform(X.iloc[1:3]) + + +def test_partial_fit_validate_max_features(): + """Test that partial_fit from SelectFromModel validates `max_features`.""" + X, y = datasets.make_classification( + n_samples=100, + n_features=4, + random_state=0, + ) + + # Case 1: Check if partial_fit raises ValueError when the value of + # `max_features` is out of the boundaries + err_msg = "max_features == 10, must be <= 4." + with pytest.raises(ValueError, match=err_msg): + SelectFromModel( + estimator=SGDClassifier(), + max_features=10 + ).partial_fit(X, y, classes=[0, 1]) + + # Case 2: Check if partial_fit raises TypeError when `max_features` + # is neither an int nor a callable + err_msg = "'max_features' must be either an int or a callable" + with pytest.raises(TypeError, match=err_msg): + SelectFromModel( + estimator=SGDClassifier(), + max_features="a" + ).partial_fit(X, y, classes=[0, 1]) + + +def test_partial_fit_validate_feature_names(): + """Test that partial_fit from SelectFromModel validates `feature_names_in_`.""" + # Case 1: Check if `feature_names_in_` is defined only when `X` has + # feature names that are all strings. + X, y = datasets.make_classification( + n_samples=100, + n_features=4, + random_state=0, + ) + + selector = SelectFromModel( + estimator=SGDClassifier(), + max_features=4 + ).partial_fit(X, y, classes=[0, 1]) + assert not hasattr(selector.estimator_, "feature_names_in_") + + # Case 2: Check if `feature_names_in_` is defined only when `X` + # has feature names that are all strings. + pytest.importorskip("pandas") + X, y = datasets.load_iris(as_frame=True, return_X_y=True) + + selector = SelectFromModel( + estimator=SGDClassifier(), + max_features=4 + ).partial_fit(X, y, classes=[0, 1]) + assert_array_equal(selector.feature_names_in_, X.columns) From ea81a2b65307006fe0bf32360d116b8d74b0623a Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 15:31:30 +0900 Subject: [PATCH 02/12] fix style --- .../tests/test_from_model.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 7bff7cb870f30..2b81093507d02 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -621,19 +621,17 @@ def test_partial_fit_validate_max_features(): # `max_features` is out of the boundaries err_msg = "max_features == 10, must be <= 4." with pytest.raises(ValueError, match=err_msg): - SelectFromModel( - estimator=SGDClassifier(), - max_features=10 - ).partial_fit(X, y, classes=[0, 1]) + SelectFromModel(estimator=SGDClassifier(), max_features=10).partial_fit( + X, y, classes=[0, 1] + ) # Case 2: Check if partial_fit raises TypeError when `max_features` # is neither an int nor a callable err_msg = "'max_features' must be either an int or a callable" with pytest.raises(TypeError, match=err_msg): - SelectFromModel( - estimator=SGDClassifier(), - max_features="a" - ).partial_fit(X, y, classes=[0, 1]) + SelectFromModel(estimator=SGDClassifier(), max_features="a").partial_fit( + X, y, classes=[0, 1] + ) def test_partial_fit_validate_feature_names(): @@ -646,10 +644,9 @@ def test_partial_fit_validate_feature_names(): random_state=0, ) - selector = SelectFromModel( - estimator=SGDClassifier(), - max_features=4 - ).partial_fit(X, y, classes=[0, 1]) + selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( + X, y, classes=[0, 1] + ) assert not hasattr(selector.estimator_, "feature_names_in_") # Case 2: Check if `feature_names_in_` is defined only when `X` @@ -657,8 +654,7 @@ def test_partial_fit_validate_feature_names(): pytest.importorskip("pandas") X, y = datasets.load_iris(as_frame=True, return_X_y=True) - selector = SelectFromModel( - estimator=SGDClassifier(), - max_features=4 - ).partial_fit(X, y, classes=[0, 1]) + selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( + X, y, classes=[0, 1] + ) assert_array_equal(selector.feature_names_in_, X.columns) From f08a8a5c6bf9ceed6bab91288a59894f4d2064f1 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 16:21:52 +0900 Subject: [PATCH 03/12] fix max_val error when input is dataframe --- sklearn/feature_selection/_from_model.py | 18 ++++++++++++++---- .../feature_selection/tests/test_from_model.py | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index d70b8fe15c3aa..4043083bc3840 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -308,13 +308,18 @@ def fit(self, X, y=None, **fit_params): Fitted estimator. """ if self.max_features is not None: + if hasattr(X, "shape"): + n_features = X.shape[1] + else: + n_features = len(X[0]) + if isinstance(self.max_features, numbers.Integral): check_scalar( self.max_features, "max_features", numbers.Integral, min_val=0, - max_val=len(X[0]), + max_val=n_features, ) self.max_features_ = self.max_features elif callable(self.max_features): @@ -324,7 +329,7 @@ def fit(self, X, y=None, **fit_params): "max_features(X)", numbers.Integral, min_val=0, - max_val=len(X[0]), + max_val=n_features, ) self.max_features_ = max_features else: @@ -386,13 +391,18 @@ def partial_fit(self, X, y=None, **fit_params): Fitted estimator. """ if self.max_features is not None: + if hasattr(X, "shape"): + n_features = X.shape[1] + else: + n_features = len(X[0]) + if isinstance(self.max_features, numbers.Integral): check_scalar( self.max_features, "max_features", numbers.Integral, min_val=0, - max_val=len(X[0]), + max_val=n_features, ) self.max_features_ = self.max_features elif callable(self.max_features): @@ -402,7 +412,7 @@ def partial_fit(self, X, y=None, **fit_params): "max_features(X)", numbers.Integral, min_val=0, - max_val=len(X[0]), + max_val=n_features, ) self.max_features_ = max_features else: diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 2b81093507d02..cdf0669ba90c2 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -655,6 +655,6 @@ def test_partial_fit_validate_feature_names(): X, y = datasets.load_iris(as_frame=True, return_X_y=True) selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( - X, y, classes=[0, 1] + X, y, classes=[0, 1, 2] ) assert_array_equal(selector.feature_names_in_, X.columns) From 9be3c8fd98d34feac69e273ca65b8b62a1873328 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 17:23:33 +0900 Subject: [PATCH 04/12] add test cases --- .../tests/test_from_model.py | 49 +++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index cdf0669ba90c2..798eb5f7510ee 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -609,7 +609,15 @@ def importance_getter(estimator): selector.transform(X.iloc[1:3]) -def test_partial_fit_validate_max_features(): +@pytest.mark.parametrize( + "error, err_msg, max_features", + ( + [ValueError, "max_features == 10, must be <= 4.", 10], + [TypeError, "'max_features' must be either an int or a callable", "a"], + [ValueError, "max_features\(X\) == 5, must be <= 4.", lambda x: x.shape[1] + 1], + ), +) +def test_partial_fit_validate_max_features(error, err_msg, max_features): """Test that partial_fit from SelectFromModel validates `max_features`.""" X, y = datasets.make_classification( n_samples=100, @@ -617,27 +625,15 @@ def test_partial_fit_validate_max_features(): random_state=0, ) - # Case 1: Check if partial_fit raises ValueError when the value of - # `max_features` is out of the boundaries - err_msg = "max_features == 10, must be <= 4." - with pytest.raises(ValueError, match=err_msg): - SelectFromModel(estimator=SGDClassifier(), max_features=10).partial_fit( - X, y, classes=[0, 1] - ) - - # Case 2: Check if partial_fit raises TypeError when `max_features` - # is neither an int nor a callable - err_msg = "'max_features' must be either an int or a callable" - with pytest.raises(TypeError, match=err_msg): - SelectFromModel(estimator=SGDClassifier(), max_features="a").partial_fit( - X, y, classes=[0, 1] - ) + with pytest.raises(error, match=err_msg): + SelectFromModel( + estimator=SGDClassifier(), max_features=max_features + ).partial_fit(X, y, classes=[0, 1]) def test_partial_fit_validate_feature_names(): """Test that partial_fit from SelectFromModel validates `feature_names_in_`.""" - # Case 1: Check if `feature_names_in_` is defined only when `X` has - # feature names that are all strings. + # Case 1: Test array like data. X, y = datasets.make_classification( n_samples=100, n_features=4, @@ -649,8 +645,21 @@ def test_partial_fit_validate_feature_names(): ) assert not hasattr(selector.estimator_, "feature_names_in_") - # Case 2: Check if `feature_names_in_` is defined only when `X` - # has feature names that are all strings. + # Case 2: Test List data + X = [ + [0.87, -1.34, 0.31, 0.11], + [-2.79, -0.02, -0.85, 3.11], + [-1.34, -0.48, -2.55, 2.13], + [1.92, 1.48, 0.65, -1.41], + ] + y = [0, 1, 0, 1] + + selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( + X, y, classes=[0, 1] + ) + assert not hasattr(selector.estimator_, "feature_names_in_") + + # Case 3: Test Frame data pytest.importorskip("pandas") X, y = datasets.load_iris(as_frame=True, return_X_y=True) From 2918ba36676458c6459ca4e2a4b1ea197f48c698 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 17:28:54 +0900 Subject: [PATCH 05/12] fix style --- sklearn/feature_selection/tests/test_from_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 798eb5f7510ee..3fdee3d4b1801 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -612,9 +612,9 @@ def importance_getter(estimator): @pytest.mark.parametrize( "error, err_msg, max_features", ( - [ValueError, "max_features == 10, must be <= 4.", 10], + [ValueError, "max_features == 10, must be <= 4", 10], [TypeError, "'max_features' must be either an int or a callable", "a"], - [ValueError, "max_features\(X\) == 5, must be <= 4.", lambda x: x.shape[1] + 1], + [ValueError, r"max_features\(X\) == 5, must be <= 4", lambda x: x.shape[1] + 1], ), ) def test_partial_fit_validate_max_features(error, err_msg, max_features): From dc7020d565850caa7447badd0a46fddb31866adb Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 21:29:55 +0900 Subject: [PATCH 06/12] add changelog --- doc/whats_new/v1.2.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index e409461bccc77..0b093644c193c 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -50,6 +50,13 @@ Changelog - |Efficiency| Improve runtime performance of :class:`ensemble.IsolationForest` by avoiding data copies. :pr:`23252` by :user:`Zhehao Liu `. +:mod:`sklearn.feature_selection` +....................... + +- |Fix| The `partial_fit` method of :class:`feature_selection.SelectFromModel` + now conducts validation for `max_features` and `feature_names_in` parameters. + :pr:`23299` by :user:`Long Bao `. + Code and Documentation Contributors ----------------------------------- From c6146d1a4f7ecfc3dc219a8983d53fc19c4a9410 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 7 May 2022 21:56:15 +0900 Subject: [PATCH 07/12] fix changelog --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 0b093644c193c..dab0bfbb60908 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -51,7 +51,7 @@ Changelog by avoiding data copies. :pr:`23252` by :user:`Zhehao Liu `. :mod:`sklearn.feature_selection` -....................... +................................ - |Fix| The `partial_fit` method of :class:`feature_selection.SelectFromModel` now conducts validation for `max_features` and `feature_names_in` parameters. From d1d81fe01f8a25f18240d9b5e2921a40ea77fb50 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 8 May 2022 12:08:27 +0900 Subject: [PATCH 08/12] refactor code --- sklearn/feature_selection/_from_model.py | 87 +++++++------------ .../tests/test_from_model.py | 38 -------- 2 files changed, 29 insertions(+), 96 deletions(-) diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 4043083bc3840..0c41c66fbef1f 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -10,11 +10,10 @@ from ._base import _get_feature_importances from ..base import BaseEstimator, clone, MetaEstimatorMixin from ..utils._tags import _safe_tags -from ..utils.validation import check_is_fitted +from ..utils.validation import check_is_fitted, check_scalar, _num_features from ..exceptions import NotFittedError from ..utils.metaestimators import available_if -from ..utils.validation import check_scalar def _calculate_threshold(estimator, importances, threshold): @@ -287,31 +286,9 @@ def _get_support_mask(self): mask[scores < threshold] = False return mask - def fit(self, X, y=None, **fit_params): - """Fit the SelectFromModel meta-transformer. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - The training input samples. - - y : array-like of shape (n_samples,), default=None - The target values (integers that correspond to classes in - classification, real numbers in regression). - - **fit_params : dict - Other estimator specific parameters. - - Returns - ------- - self : object - Fitted estimator. - """ + def _check_max_features(self, X): if self.max_features is not None: - if hasattr(X, "shape"): - n_features = X.shape[1] - else: - n_features = len(X[0]) + n_features = _num_features(X) if isinstance(self.max_features, numbers.Integral): check_scalar( @@ -338,6 +315,28 @@ def fit(self, X, y=None, **fit_params): f" 'X' as input. Got {self.max_features} instead." ) + def fit(self, X, y=None, **fit_params): + """Fit the SelectFromModel meta-transformer. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The training input samples. + + y : array-like of shape (n_samples,), default=None + The target values (integers that correspond to classes in + classification, real numbers in regression). + + **fit_params : dict + Other estimator specific parameters. + + Returns + ------- + self : object + Fitted estimator. + """ + self._check_max_features(X) + if self.prefit: try: check_is_fitted(self.estimator) @@ -390,36 +389,7 @@ def partial_fit(self, X, y=None, **fit_params): self : object Fitted estimator. """ - if self.max_features is not None: - if hasattr(X, "shape"): - n_features = X.shape[1] - else: - n_features = len(X[0]) - - if isinstance(self.max_features, numbers.Integral): - check_scalar( - self.max_features, - "max_features", - numbers.Integral, - min_val=0, - max_val=n_features, - ) - self.max_features_ = self.max_features - elif callable(self.max_features): - max_features = self.max_features(X) - check_scalar( - max_features, - "max_features(X)", - numbers.Integral, - min_val=0, - max_val=n_features, - ) - self.max_features_ = max_features - else: - raise TypeError( - "'max_features' must be either an int or a callable that takes" - f" 'X' as input. Got {self.max_features} instead." - ) + self._check_max_features(X) if self.prefit: if not hasattr(self, "estimator_"): @@ -433,14 +403,15 @@ def partial_fit(self, X, y=None, **fit_params): self.estimator_ = deepcopy(self.estimator) return self - if not hasattr(self, "estimator_"): + first_call = not hasattr(self, "estimator_") + if first_call: self.estimator_ = clone(self.estimator) self.estimator_.partial_fit(X, y, **fit_params) if hasattr(self.estimator_, "feature_names_in_"): self.feature_names_in_ = self.estimator_.feature_names_in_ else: - self._check_feature_names(X, reset=True) + self._check_feature_names(X, reset=first_call) return self diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 3fdee3d4b1801..c875ecd251d37 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -629,41 +629,3 @@ def test_partial_fit_validate_max_features(error, err_msg, max_features): SelectFromModel( estimator=SGDClassifier(), max_features=max_features ).partial_fit(X, y, classes=[0, 1]) - - -def test_partial_fit_validate_feature_names(): - """Test that partial_fit from SelectFromModel validates `feature_names_in_`.""" - # Case 1: Test array like data. - X, y = datasets.make_classification( - n_samples=100, - n_features=4, - random_state=0, - ) - - selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( - X, y, classes=[0, 1] - ) - assert not hasattr(selector.estimator_, "feature_names_in_") - - # Case 2: Test List data - X = [ - [0.87, -1.34, 0.31, 0.11], - [-2.79, -0.02, -0.85, 3.11], - [-1.34, -0.48, -2.55, 2.13], - [1.92, 1.48, 0.65, -1.41], - ] - y = [0, 1, 0, 1] - - selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( - X, y, classes=[0, 1] - ) - assert not hasattr(selector.estimator_, "feature_names_in_") - - # Case 3: Test Frame data - pytest.importorskip("pandas") - X, y = datasets.load_iris(as_frame=True, return_X_y=True) - - selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( - X, y, classes=[0, 1, 2] - ) - assert_array_equal(selector.feature_names_in_, X.columns) From 2f9775567af4be6f789bcdc2bd37cd3a3cbc4bba Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 8 May 2022 14:18:37 +0900 Subject: [PATCH 09/12] fix codecov warns --- sklearn/utils/estimator_checks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 86494b2ae2307..ba25427a27263 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -31,6 +31,7 @@ from ..linear_model import LogisticRegression from ..linear_model import RANSACRegressor from ..linear_model import Ridge +from ..linear_model import SGDRegressor from ..base import ( clone, @@ -44,6 +45,7 @@ from ..metrics import accuracy_score, adjusted_rand_score, f1_score from ..random_projection import BaseRandomProjection from ..feature_selection import SelectKBest +from ..feature_selection import SelectFromModel from ..pipeline import make_pipeline from ..exceptions import DataConversionWarning from ..exceptions import NotFittedError @@ -389,6 +391,8 @@ def _construct_instance(Estimator): estimator = Estimator(LinearRegression()) elif issubclass(Estimator, RegressorMixin): estimator = Estimator(Ridge()) + elif issubclass(Estimator, SelectFromModel): + estimator = Estimator(SGDRegressor()) else: estimator = Estimator(LogisticRegression(C=1)) elif required_parameters in (["estimators"],): From 5101820391b9c86cb5d9ea5a51dacedd1540edc2 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 8 May 2022 15:10:59 +0900 Subject: [PATCH 10/12] fix condecov error --- .../feature_selection/tests/test_from_model.py | 15 +++++++++++++++ sklearn/utils/estimator_checks.py | 4 ---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index c875ecd251d37..de45d9e0ab6a4 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -629,3 +629,18 @@ def test_partial_fit_validate_max_features(error, err_msg, max_features): SelectFromModel( estimator=SGDClassifier(), max_features=max_features ).partial_fit(X, y, classes=[0, 1]) + + +@pytest.mark.parametrize("as_frame", [True, False]) +def test_partial_fit_validate_feature_names(as_frame): + """Test that partial_fit from SelectFromModel validates `feature_names_in_`.""" + pytest.importorskip("pandas") + X, y = datasets.load_iris(as_frame=as_frame, return_X_y=True) + + selector = SelectFromModel(estimator=SGDClassifier(), max_features=4).partial_fit( + X, y, classes=[0, 1, 2] + ) + if as_frame: + assert_array_equal(selector.feature_names_in_, X.columns) + else: + assert not hasattr(selector, "feature_names_in_") diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index ba25427a27263..86494b2ae2307 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -31,7 +31,6 @@ from ..linear_model import LogisticRegression from ..linear_model import RANSACRegressor from ..linear_model import Ridge -from ..linear_model import SGDRegressor from ..base import ( clone, @@ -45,7 +44,6 @@ from ..metrics import accuracy_score, adjusted_rand_score, f1_score from ..random_projection import BaseRandomProjection from ..feature_selection import SelectKBest -from ..feature_selection import SelectFromModel from ..pipeline import make_pipeline from ..exceptions import DataConversionWarning from ..exceptions import NotFittedError @@ -391,8 +389,6 @@ def _construct_instance(Estimator): estimator = Estimator(LinearRegression()) elif issubclass(Estimator, RegressorMixin): estimator = Estimator(Ridge()) - elif issubclass(Estimator, SelectFromModel): - estimator = Estimator(SGDRegressor()) else: estimator = Estimator(LogisticRegression(C=1)) elif required_parameters in (["estimators"],): From e58b8b6aa04fba41f4c9a9cfa0161c6a9b4ea949 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 8 May 2022 22:43:32 +0900 Subject: [PATCH 11/12] update common test --- sklearn/utils/estimator_checks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 86494b2ae2307..7b3291e1ea983 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -31,6 +31,7 @@ from ..linear_model import LogisticRegression from ..linear_model import RANSACRegressor from ..linear_model import Ridge +from ..linear_model import SGDRegressor from ..base import ( clone, @@ -44,6 +45,7 @@ from ..metrics import accuracy_score, adjusted_rand_score, f1_score from ..random_projection import BaseRandomProjection from ..feature_selection import SelectKBest +from ..feature_selection import SelectFromModel from ..pipeline import make_pipeline from ..exceptions import DataConversionWarning from ..exceptions import NotFittedError @@ -389,6 +391,8 @@ def _construct_instance(Estimator): estimator = Estimator(LinearRegression()) elif issubclass(Estimator, RegressorMixin): estimator = Estimator(Ridge()) + elif issubclass(Estimator, SelectFromModel): + estimator = Estimator(SGDRegressor(random_state=0)) else: estimator = Estimator(LogisticRegression(C=1)) elif required_parameters in (["estimators"],): From c3d2f9e0a61b77993bdc7b500219b58742a2734e Mon Sep 17 00:00:00 2001 From: SELEE Date: Mon, 9 May 2022 01:51:49 +0900 Subject: [PATCH 12/12] Update sklearn/utils/estimator_checks.py Co-authored-by: Thomas J. Fan --- sklearn/utils/estimator_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 7b3291e1ea983..33cd54a5cb4ac 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -392,6 +392,7 @@ def _construct_instance(Estimator): elif issubclass(Estimator, RegressorMixin): estimator = Estimator(Ridge()) elif issubclass(Estimator, SelectFromModel): + # Increases coverage because SGDRegressor has partial_fit estimator = Estimator(SGDRegressor(random_state=0)) else: estimator = Estimator(LogisticRegression(C=1))