From 7e54736e5402f89e70d5454e1f9490e1fc2faae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= Date: Sat, 18 Mar 2023 13:21:44 +0100 Subject: [PATCH 1/2] add parameters validation for datasets.make_regression --- sklearn/datasets/_samples_generator.py | 15 +++++++++++++++ sklearn/tests/test_public_functions.py | 1 + 2 files changed, 16 insertions(+) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index a3495b358354f..f1d4a6dde1a63 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -523,6 +523,21 @@ def make_hastie_10_2(n_samples=12000, *, random_state=None): return X, y +@validate_params( + { + "n_samples": [Interval(Integral, 1, None, closed="left")], + "n_features": [Interval(Integral, 1, None, closed="left")], + "n_informative": [Interval(Integral, 1, None, closed="left")], + "n_targets": [Interval(Integral, 1, None, closed="left")], + "bias": [Interval(Real, None, None, closed="neither")], + "effective_rank": [Interval(Integral, 1, None, closed="left"), None], + "tail_strength": [Interval(Real, 0, 1, closed="both")], + "noise": [Interval(Real, 0, None, closed="left")], + "shuffle": ["boolean"], + "coef": ["boolean"], + "random_state": ["random_state"], + } +) def make_regression( n_samples=100, n_features=100, diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index f72d622e53902..deaf596a4032f 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -132,6 +132,7 @@ def _check_function_param_validation( "sklearn.datasets.make_circles", "sklearn.datasets.make_classification", "sklearn.datasets.make_friedman1", + "sklearn.datasets.make_regression", "sklearn.datasets.make_sparse_coded_signal", "sklearn.decomposition.sparse_encode", "sklearn.feature_extraction.grid_to_graph", From 8c1f1603ac03b46fba203855fc8c5e65375af9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Baranger?= <39696928+tbaranger@users.noreply.github.com> Date: Tue, 21 Mar 2023 13:42:26 +0100 Subject: [PATCH 2/2] lower minimum value for parameter n_informative from 1 to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com> --- sklearn/datasets/_samples_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index f1d4a6dde1a63..829352f6eae49 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -527,7 +527,7 @@ def make_hastie_10_2(n_samples=12000, *, random_state=None): { "n_samples": [Interval(Integral, 1, None, closed="left")], "n_features": [Interval(Integral, 1, None, closed="left")], - "n_informative": [Interval(Integral, 1, None, closed="left")], + "n_informative": [Interval(Integral, 0, None, closed="left")], "n_targets": [Interval(Integral, 1, None, closed="left")], "bias": [Interval(Real, None, None, closed="neither")], "effective_rank": [Interval(Integral, 1, None, closed="left"), None],