From 8baad8b0c9d804db82aa7dc86f149750b9fcb93f Mon Sep 17 00:00:00 2001 From: Sangam <35230623+SangamSwadiK@users.noreply.github.com> Date: Fri, 10 Jun 2022 17:15:47 +0530 Subject: [PATCH 1/6] Remove variance threshold Remove variance threshold from to be validated --- sklearn/tests/test_common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 17ad851a3d7bd..ad27c0cc069fa 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -634,7 +634,6 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator): "TransformedTargetRegressor", "TruncatedSVD", "TweedieRegressor", - "VarianceThreshold", "VotingClassifier", "VotingRegressor", ] From 17be241e05158543aebaa62b5342d0d40f600320 Mon Sep 17 00:00:00 2001 From: Sangam <35230623+SangamSwadiK@users.noreply.github.com> Date: Fri, 10 Jun 2022 17:32:34 +0530 Subject: [PATCH 2/6] Add validation for variance threshold Add parameter validation for variance threshold. --- sklearn/feature_selection/_variance_threshold.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 7f274b3a308ef..0007054a9f899 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -2,11 +2,12 @@ # License: 3-clause BSD import numpy as np +from numbers import Real from ..base import BaseEstimator from ._base import SelectorMixin from ..utils.sparsefuncs import mean_variance_axis, min_max_axis from ..utils.validation import check_is_fitted - +from ..utils._param_validation import Interval class VarianceThreshold(SelectorMixin, BaseEstimator): """Feature selector that removes all low-variance features. @@ -66,6 +67,7 @@ class VarianceThreshold(SelectorMixin, BaseEstimator): [1, 4], [1, 1]]) """ + _parameter_constraints = {"threshold": [Interval(Real, 0, 1, closed="both")]} def __init__(self, threshold=0.0): self.threshold = threshold @@ -88,6 +90,8 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ + self._validate_params() + X = self._validate_data( X, accept_sparse=("csr", "csc"), From b507986bedd25e8194cb9a6af7ba71134a3f32e9 Mon Sep 17 00:00:00 2001 From: Sangam Swadi K Date: Fri, 10 Jun 2022 17:43:48 +0530 Subject: [PATCH 3/6] fix linting issues --- sklearn/feature_selection/_variance_threshold.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 0007054a9f899..fa7f478c85598 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -9,6 +9,7 @@ from ..utils.validation import check_is_fitted from ..utils._param_validation import Interval + class VarianceThreshold(SelectorMixin, BaseEstimator): """Feature selector that removes all low-variance features. @@ -67,7 +68,8 @@ class VarianceThreshold(SelectorMixin, BaseEstimator): [1, 4], [1, 1]]) """ - _parameter_constraints = {"threshold": [Interval(Real, 0, 1, closed="both")]} + + _parameter_constraints = {"threshold": [Interval(Real, 0, 1, closed="both")]} def __init__(self, threshold=0.0): self.threshold = threshold @@ -91,7 +93,6 @@ def fit(self, X, y=None): Returns the instance itself. """ self._validate_params() - X = self._validate_data( X, accept_sparse=("csr", "csc"), From 95348154e0d4b250cd1be72cc0bf66ab6de7f676 Mon Sep 17 00:00:00 2001 From: Sangam Swadi K Date: Fri, 10 Jun 2022 18:41:25 +0530 Subject: [PATCH 4/6] changed interval --- sklearn/feature_selection/_variance_threshold.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index fa7f478c85598..30a7f48da33b8 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -69,7 +69,7 @@ class VarianceThreshold(SelectorMixin, BaseEstimator): [1, 1]]) """ - _parameter_constraints = {"threshold": [Interval(Real, 0, 1, closed="both")]} + _parameter_constraints = {"threshold": [Interval(Real, 0, None, closed="left")]} def __init__(self, threshold=0.0): self.threshold = threshold From 24d571fae6743efa0b9d181cd7f4b7d871a92075 Mon Sep 17 00:00:00 2001 From: Sangam Swadi K Date: Fri, 10 Jun 2022 22:39:37 +0530 Subject: [PATCH 5/6] regroup standard imports and remove check for negative threshold --- sklearn/feature_selection/_variance_threshold.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 30a7f48da33b8..18cd1b4ae4603 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -1,8 +1,8 @@ # Author: Lars Buitinck # License: 3-clause BSD +from numbers import Real import numpy as np -from numbers import Real from ..base import BaseEstimator from ._base import SelectorMixin from ..utils.sparsefuncs import mean_variance_axis, min_max_axis @@ -115,8 +115,6 @@ def fit(self, X, y=None): # for constant features compare_arr = np.array([self.variances_, peak_to_peaks]) self.variances_ = np.nanmin(compare_arr, axis=0) - elif self.threshold < 0.0: - raise ValueError(f"Threshold must be non-negative. Got: {self.threshold}") if np.all(~np.isfinite(self.variances_) | (self.variances_ <= self.threshold)): msg = "No feature in X meets the variance threshold {0:.5f}" From 964fc83fdc0d937e0a1b5da256a3a570e28dd373 Mon Sep 17 00:00:00 2001 From: Sangam Swadi K Date: Fri, 10 Jun 2022 22:42:55 +0530 Subject: [PATCH 6/6] remove test for negative variance --- .../feature_selection/tests/test_variance_threshold.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index 55d20e9675654..493e9e58df7bd 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -32,15 +32,6 @@ def test_variance_threshold(): assert (len(data), 1) == X.shape -@pytest.mark.parametrize("X", [data, csr_matrix(data)]) -def test_variance_negative(X): - """Test VarianceThreshold with negative variance.""" - var_threshold = VarianceThreshold(threshold=-1.0) - msg = r"^Threshold must be non-negative. Got: -1.0$" - with pytest.raises(ValueError, match=msg): - var_threshold.fit(X) - - @pytest.mark.skipif( np.var(data2) == 0, reason=(