From 00d52b075b023e9fe484e199a9a4d5b13f931af8 Mon Sep 17 00:00:00 2001 From: DatenBiene Date: Thu, 30 Jan 2020 15:43:45 +0100 Subject: [PATCH 1/5] Update random_state doc for mutual_info.py, unsupervised.py, testing.py and utils/init.py --- sklearn/feature_selection/_mutual_info.py | 22 ++++++++-------------- sklearn/metrics/cluster/_unsupervised.py | 9 ++++----- sklearn/utils/__init__.py | 14 ++++++-------- sklearn/utils/_testing.py | 7 +++---- 4 files changed, 21 insertions(+), 31 deletions(-) diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index 95d1aeb183a27..a6f7bc48b40fa 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -225,11 +225,9 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, random_state : int, RandomState instance or None, optional, default None The seed of the pseudo random number generator for adding small noise - to continuous variables in order to remove repeated values. If int, - random_state is the seed used by the random number generator; If - RandomState instance, random_state is the random number generator; If - None, the random number generator is the RandomState instance used by - `np.random`. + to continuous variables in order to remove repeated values. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. Returns ------- @@ -335,10 +333,8 @@ def mutual_info_regression(X, y, discrete_features='auto', n_neighbors=3, random_state : int, RandomState instance or None, optional, default None The seed of the pseudo random number generator for adding small noise to continuous variables in order to remove repeated values. - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. Returns ------- @@ -413,11 +409,9 @@ def mutual_info_classif(X, y, discrete_features='auto', n_neighbors=3, random_state : int, RandomState instance or None, optional, default None The seed of the pseudo random number generator for adding small noise - to continuous variables in order to remove repeated values. If int, - random_state is the seed used by the random number generator; If - RandomState instance, random_state is the random number generator; If - None, the random number generator is the RandomState instance used by - `np.random`. + to continuous variables in order to remove repeated values. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. Returns ------- diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index d6fc6fbc82ab0..dee0cb8f0fb29 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -78,11 +78,10 @@ def silhouette_score(X, labels, metric='euclidean', sample_size=None, If ``sample_size is None``, no sampling is used. random_state : int, RandomState instance or None, optional (default=None) - The generator used to randomly select a subset of samples. If int, - random_state is the seed used by the random number generator; If - RandomState instance, random_state is the random number generator; If - None, the random number generator is the RandomState instance used by - `np.random`. Used when ``sample_size is not None``. + The generator used to randomly select a subset of samples. + Used when ``sample_size is not None``. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. **kwds : optional keyword parameters Any further parameters are passed directly to the distance function. diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 5e7605e97f949..f12cd9df36d94 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -477,10 +477,9 @@ def resample(*arrays, **options): random_state : int, RandomState instance or None, optional (default=None) The seed of the pseudo random number generator to use when shuffling - the data. If int, random_state is the seed used by the random number - generator; If RandomState instance, random_state is the random number - generator; If None, the random number generator is the RandomState - instance used by `np.random`. + the data. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. stratify : array-like or None (default=None) If not None, data is split in a stratified fashion, using this as @@ -622,10 +621,9 @@ def shuffle(*arrays, **options): ---------------- random_state : int, RandomState instance or None, optional (default=None) The seed of the pseudo random number generator to use when shuffling - the data. If int, random_state is the seed used by the random number - generator; If RandomState instance, random_state is the random number - generator; If None, the random number generator is the RandomState - instance used by `np.random`. + the data. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. n_samples : int, None by default Number of samples to generate. If left to None this is diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index b4a747b1df7ce..931622b3889db 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -519,10 +519,9 @@ def set_random_state(estimator, random_state=0): estimator : object The estimator random_state : int, RandomState instance or None, optional, default=0 - Pseudo random number generator state. If int, random_state is the seed - used by the random number generator; If RandomState instance, - random_state is the random number generator; If None, the random number - generator is the RandomState instance used by `np.random`. + Pseudo random number generator state. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. """ if "random_state" in estimator.get_params(): estimator.set_params(random_state=random_state) From 110aa694948965fa51eb37bf4eb1b9808a37c164 Mon Sep 17 00:00:00 2001 From: DatenBiene Date: Thu, 30 Jan 2020 17:05:45 +0100 Subject: [PATCH 2/5] Update comments from pr --- sklearn/feature_selection/_mutual_info.py | 12 ++++++------ sklearn/utils/__init__.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index a6f7bc48b40fa..5931e7bbc6ef5 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -224,8 +224,8 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False, data will be overwritten. random_state : int, RandomState instance or None, optional, default None - The seed of the pseudo random number generator for adding small noise - to continuous variables in order to remove repeated values. + Determines random number generation for adding small noise to + continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. @@ -331,8 +331,8 @@ def mutual_info_regression(X, y, discrete_features='auto', n_neighbors=3, data will be overwritten. random_state : int, RandomState instance or None, optional, default None - The seed of the pseudo random number generator for adding small noise - to continuous variables in order to remove repeated values. + Determines random number generation for adding small noise to + continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. @@ -408,8 +408,8 @@ def mutual_info_classif(X, y, discrete_features='auto', n_neighbors=3, data will be overwritten. random_state : int, RandomState instance or None, optional, default None - The seed of the pseudo random number generator for adding small noise - to continuous variables in order to remove repeated values. + Determines random number generation for adding small noise to + continuous variables in order to remove repeated values. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index f12cd9df36d94..91dd8395d42ba 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -476,7 +476,7 @@ def resample(*arrays, **options): arrays. random_state : int, RandomState instance or None, optional (default=None) - The seed of the pseudo random number generator to use when shuffling + Determines the pseudo random number generator to use when shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. @@ -620,7 +620,7 @@ def shuffle(*arrays, **options): Other Parameters ---------------- random_state : int, RandomState instance or None, optional (default=None) - The seed of the pseudo random number generator to use when shuffling + Determines the pseudo random number generator to use when shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. From d06973c9f82f3fd0976fce3ad7c22db566338a00 Mon Sep 17 00:00:00 2001 From: Pierre Delanoue Date: Fri, 31 Jan 2020 16:20:28 +0100 Subject: [PATCH 3/5] Update sklearn/metrics/cluster/_unsupervised.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com> --- sklearn/metrics/cluster/_unsupervised.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index dee0cb8f0fb29..a0eaa5e84240a 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -78,7 +78,7 @@ def silhouette_score(X, labels, metric='euclidean', sample_size=None, If ``sample_size is None``, no sampling is used. random_state : int, RandomState instance or None, optional (default=None) - The generator used to randomly select a subset of samples. + Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. From b6e124ba10fef8486e925b398f8f1a26c1b52300 Mon Sep 17 00:00:00 2001 From: Pierre Delanoue Date: Fri, 31 Jan 2020 16:20:35 +0100 Subject: [PATCH 4/5] Update sklearn/utils/__init__.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com> --- sklearn/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 91dd8395d42ba..c28edd20b846d 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -620,7 +620,7 @@ def shuffle(*arrays, **options): Other Parameters ---------------- random_state : int, RandomState instance or None, optional (default=None) - Determines the pseudo random number generator to use when shuffling + Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. From 5d3d9afab4d43751c4eb9547dc18b5a9ec4228e8 Mon Sep 17 00:00:00 2001 From: Pierre Delanoue Date: Fri, 31 Jan 2020 16:20:44 +0100 Subject: [PATCH 5/5] Update sklearn/utils/__init__.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Jérémie du Boisberranger <34657725+jeremiedbb@users.noreply.github.com> --- sklearn/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index c28edd20b846d..6742411148b9b 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -476,7 +476,7 @@ def resample(*arrays, **options): arrays. random_state : int, RandomState instance or None, optional (default=None) - Determines the pseudo random number generator to use when shuffling + Determines random number generation for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `.