From 57bf11488eaa52171a6e081f982695b2842e006d Mon Sep 17 00:00:00 2001 From: nicolafan <48762613+nicolafan@users.noreply.github.com> Date: Tue, 24 Jan 2023 20:48:08 +0100 Subject: [PATCH 1/6] Add parameters validation for make_classification --- sklearn/datasets/_samples_generator.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 59c32125b6ff3..318fb6dc2849a 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -6,7 +6,7 @@ # G. Louppe, J. Nothman # License: BSD 3 clause -from numbers import Integral +from numbers import Integral, Real import numbers import array import warnings @@ -39,6 +39,24 @@ def _generate_hypercube(samples, dimensions, rng): return out +@validate_params( + { + "n_samples": [Interval(Integral, 1, None, closed="left")], + "n_features": [Interval(Integral, 1, None, closed="left")], + "n_informative": [Interval(Integral, 1, None, closed="left")], + "n_redundant": [Interval(Integral, 0, None, closed="left")], + "n_repeated": [Interval(Integral, 0, None, closed="left")], + "n_clusters_per_class": [Interval(Integral, 1, None, closed="left")], + "weights": ["array-like", None], + "flip_y": [Interval(Real, 0, 1, closed="both")], + "class_sep": [Interval(Real, None, None, closed="neither")], + "hypercube": ["boolean"], + "shift": [Interval(Real, None, None, closed="neither"), None], + "scale": [Interval(Real, None, None, closed="neither"), None], + "shuffle": ["boolean"], + "random_state": ["random_state"], + } +) def make_classification( n_samples=100, n_features=20, From e23f343c7b4dd85d9208ea699005489db5f68ff3 Mon Sep 17 00:00:00 2001 From: nicolafan <48762613+nicolafan@users.noreply.github.com> Date: Tue, 24 Jan 2023 20:54:25 +0100 Subject: [PATCH 2/6] Add make_classification to common param val test --- sklearn/tests/test_public_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index acc44ce60c755..e51128c8bb7ed 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -102,6 +102,7 @@ def _check_function_param_validation( "sklearn.covariance.empirical_covariance", "sklearn.covariance.shrunk_covariance", "sklearn.datasets.fetch_california_housing", + "sklearn.datasets.make_classification", "sklearn.datasets.make_sparse_coded_signal", "sklearn.decomposition.sparse_encode", "sklearn.feature_extraction.grid_to_graph", From a355a69fb6cae1da1c7647d55cda763c4a0a23c3 Mon Sep 17 00:00:00 2001 From: nicolafan <48762613+nicolafan@users.noreply.github.com> Date: Tue, 24 Jan 2023 21:00:15 +0100 Subject: [PATCH 3/6] Add n_classes to validate_params --- sklearn/datasets/_samples_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 318fb6dc2849a..a92adcb3f0f4a 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -46,6 +46,7 @@ def _generate_hypercube(samples, dimensions, rng): "n_informative": [Interval(Integral, 1, None, closed="left")], "n_redundant": [Interval(Integral, 0, None, closed="left")], "n_repeated": [Interval(Integral, 0, None, closed="left")], + "n_classes": [Interval(Integral, 1, None, closed="left")], "n_clusters_per_class": [Interval(Integral, 1, None, closed="left")], "weights": ["array-like", None], "flip_y": [Interval(Real, 0, 1, closed="both")], From f1370d4382153129f9f0c366460d302b6f3832fa Mon Sep 17 00:00:00 2001 From: Nicola Fanelli <48762613+nicolafan@users.noreply.github.com> Date: Wed, 25 Jan 2023 14:23:45 +0100 Subject: [PATCH 4/6] Add array type to validation of shift and scale Co-authored-by: Guillaume Lemaitre --- sklearn/datasets/_samples_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index a92adcb3f0f4a..d9532da95988a 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -52,8 +52,8 @@ def _generate_hypercube(samples, dimensions, rng): "flip_y": [Interval(Real, 0, 1, closed="both")], "class_sep": [Interval(Real, None, None, closed="neither")], "hypercube": ["boolean"], - "shift": [Interval(Real, None, None, closed="neither"), None], - "scale": [Interval(Real, None, None, closed="neither"), None], + "shift": [Interval(Real, None, None, closed="neither"), "array-like", None], + "scale": [Interval(Real, None, None, closed="neither"), "array-like", None], "shuffle": ["boolean"], "random_state": ["random_state"], } From a116f73afebdf456c4e0cd012f84a4903feff9ac Mon Sep 17 00:00:00 2001 From: nicolafan <48762613+nicolafan@users.noreply.github.com> Date: Wed, 25 Jan 2023 14:56:07 +0100 Subject: [PATCH 5/6] Require positive values for class_sep --- sklearn/datasets/_samples_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index d9532da95988a..3d9e159af07c0 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -50,7 +50,7 @@ def _generate_hypercube(samples, dimensions, rng): "n_clusters_per_class": [Interval(Integral, 1, None, closed="left")], "weights": ["array-like", None], "flip_y": [Interval(Real, 0, 1, closed="both")], - "class_sep": [Interval(Real, None, None, closed="neither")], + "class_sep": [Interval(Real, 0, None, closed="neither")], "hypercube": ["boolean"], "shift": [Interval(Real, None, None, closed="neither"), "array-like", None], "scale": [Interval(Real, None, None, closed="neither"), "array-like", None], From 59fef7712aa3f2655468a7e02c713fec1e228d8b Mon Sep 17 00:00:00 2001 From: nicolafan <48762613+nicolafan@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:42:23 +0100 Subject: [PATCH 6/6] Require positive values for scale --- sklearn/datasets/_samples_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 3d9e159af07c0..ffe81d9c13a18 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -53,7 +53,7 @@ def _generate_hypercube(samples, dimensions, rng): "class_sep": [Interval(Real, 0, None, closed="neither")], "hypercube": ["boolean"], "shift": [Interval(Real, None, None, closed="neither"), "array-like", None], - "scale": [Interval(Real, None, None, closed="neither"), "array-like", None], + "scale": [Interval(Real, 0, None, closed="neither"), "array-like", None], "shuffle": ["boolean"], "random_state": ["random_state"], }