From fe91aaf95cdbe4bb1e3400b44ad4ed965b424264 Mon Sep 17 00:00:00 2001 From: Maskani Filali Mohamed Date: Fri, 6 Apr 2018 13:02:00 -0400 Subject: [PATCH 1/8] Improve docstring --- sklearn/model_selection/_validation.py | 30 ++++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index ceddce37781ad..0ca778d3b721d 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -871,7 +871,20 @@ def _index_param_value(X, v, indices): def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): - """Evaluate the significance of a cross-validated score with permutations + """Evaluate the significance of a cross-validated score by permuting + the labels of the samples and computing the p-value against the null + hypothesis that the features and the labels are independent, meaning that + there is no difference between the classes. + + The p-value represents the fraction of randomized data sets where the + classifier would have had a larger error on the original data + than in the randomized one. + + A small p-value (under a threshold, like :math:`\alpha = 0.05`) gives enough + evidence to conclude that the classifier has not learned a random pattern + in the data. + + .. versionadded:: 0.9 Read more in the :ref:`User Guide `. @@ -953,14 +966,17 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. - Notes - ----- - This function implements Test 1 in: + References + ---------- - Ojala and Garriga. Permutation Tests for Studying Classifier - Performance. The Journal of Machine Learning Research (2010) - vol. 11 + * `"Permutation Tests for Studying Classifier Performance" + `_ + Ojala and Garriga - The Journal of Machine Learning Research (2010) + vol. 11 + Notes + ----- + This function implements "Test 1" as described in the paper given above. """ X, y, groups = indexable(X, y, groups) From d839cbbb79867b8721b17569154222251ecfe4d6 Mon Sep 17 00:00:00 2001 From: Maskani Filali Mohamed Date: Fri, 6 Apr 2018 13:02:32 -0400 Subject: [PATCH 2/8] Change whats_new --- doc/whats_new/older_versions.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst index eeb672914f033..e9aa8e2825091 100644 --- a/doc/whats_new/older_versions.rst +++ b/doc/whats_new/older_versions.rst @@ -787,6 +787,8 @@ Changelog - :ref:`olivetti_faces` by `David Warde-Farley`_. +- :func:`model_selection.model_selection.permutation_test_score` by `Alexandre Gramfort`_. + API changes summary ------------------- @@ -877,7 +879,7 @@ People - 127 `Jake Vanderplas`_ - 120 `Mathieu Blondel`_ - 85 `Alexandre Passos`_ -- 67 `Alexandre Gramfort`_ +- 68 `Alexandre Gramfort`_ - 57 `Peter Prettenhofer`_ - 56 `Gilles Louppe`_ - 42 Robert Layton From 71bdd726c4adb68702f4749880b1c5fafc3a0a37 Mon Sep 17 00:00:00 2001 From: Maskani Filali Mohamed Date: Fri, 6 Apr 2018 13:34:15 -0400 Subject: [PATCH 3/8] Fix pep8 --- sklearn/model_selection/_validation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 0ca778d3b721d..cfb55db127bf8 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -880,9 +880,9 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, classifier would have had a larger error on the original data than in the randomized one. - A small p-value (under a threshold, like :math:`\alpha = 0.05`) gives enough - evidence to conclude that the classifier has not learned a random pattern - in the data. + A small p-value (under a threshold, like :math:`\alpha = 0.05`) gives + enough evidence to conclude that the classifier has not learned a random + pattern in the data. .. versionadded:: 0.9 From 573d89170b58cf0e4263554165145c73e8da84f4 Mon Sep 17 00:00:00 2001 From: Aditi Gupta Date: Sat, 24 Aug 2019 12:06:55 -0400 Subject: [PATCH 4/8] Added documentation for permutation_test_score in the user guide. --- doc/modules/cross_validation.rst | 8 ++++++++ doc/whats_new/older_versions.rst | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index e889515da0923..c4a527ccd34ee 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -235,6 +235,14 @@ Here is an example of ``cross_validate`` using a single metric:: >>> sorted(scores.keys()) ['estimator', 'fit_time', 'score_time', 'test_score', 'train_score'] +Cross-validation significance evaluation +---------------------------------------- + +Significance of cross validation scores can be evaluated using :func:`permutation_test_score` function. The function returns a p-value, which approximates the probability that the average cross-validation score would be obtained at by chance. + + +It also returns cross_validation scores for each permutation of y labels. It permutes the labels of the samples and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there is no difference between the classes. + Obtaining predictions by cross-validation ----------------------------------------- diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst index e9aa8e2825091..eeb672914f033 100644 --- a/doc/whats_new/older_versions.rst +++ b/doc/whats_new/older_versions.rst @@ -787,8 +787,6 @@ Changelog - :ref:`olivetti_faces` by `David Warde-Farley`_. -- :func:`model_selection.model_selection.permutation_test_score` by `Alexandre Gramfort`_. - API changes summary ------------------- @@ -879,7 +877,7 @@ People - 127 `Jake Vanderplas`_ - 120 `Mathieu Blondel`_ - 85 `Alexandre Passos`_ -- 68 `Alexandre Gramfort`_ +- 67 `Alexandre Gramfort`_ - 57 `Peter Prettenhofer`_ - 56 `Gilles Louppe`_ - 42 Robert Layton From 096c3be3299e9b525b0a4106d821f0c40f53a825 Mon Sep 17 00:00:00 2001 From: Aditi Gupta Date: Sat, 24 Aug 2019 12:32:28 -0400 Subject: [PATCH 5/8] Wrapped user guide text to 79 char --- doc/modules/cross_validation.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index c4a527ccd34ee..ff1d61c6470ca 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -238,10 +238,16 @@ Here is an example of ``cross_validate`` using a single metric:: Cross-validation significance evaluation ---------------------------------------- -Significance of cross validation scores can be evaluated using :func:`permutation_test_score` function. The function returns a p-value, which approximates the probability that the average cross-validation score would be obtained at by chance. +Significance of cross validation scores can be evaluated using +:func:`permutation_test_score` function. The function returns a p-value, which +approximates the probability that the average cross-validation score would be +obtained at by chance. -It also returns cross_validation scores for each permutation of y labels. It permutes the labels of the samples and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there is no difference between the classes. +It also returns cross_validation scores for each permutation of y labels. It +permutes the labels of the samples and computes the p-value against the null +hypothesis that the features and the labels are independent, meaning that there +is no difference between the classes. Obtaining predictions by cross-validation From 201dc02787c79b4db997bb9601e9295324cc7eb0 Mon Sep 17 00:00:00 2001 From: Aditi Gupta Date: Sat, 24 Aug 2019 15:00:11 -0400 Subject: [PATCH 6/8] Moved user guide documentation for permutation_test_score to after section "Obtaining predictions by cross-validation" and modified function help text to follow PEP8. --- doc/modules/cross_validation.rst | 30 +++++++++++++------------- sklearn/model_selection/_validation.py | 5 +++-- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index ff1d61c6470ca..7b10f6a48408a 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -235,21 +235,6 @@ Here is an example of ``cross_validate`` using a single metric:: >>> sorted(scores.keys()) ['estimator', 'fit_time', 'score_time', 'test_score', 'train_score'] -Cross-validation significance evaluation ----------------------------------------- - -Significance of cross validation scores can be evaluated using -:func:`permutation_test_score` function. The function returns a p-value, which -approximates the probability that the average cross-validation score would be -obtained at by chance. - - -It also returns cross_validation scores for each permutation of y labels. It -permutes the labels of the samples and computes the p-value against the null -hypothesis that the features and the labels are independent, meaning that there -is no difference between the classes. - - Obtaining predictions by cross-validation ----------------------------------------- @@ -289,6 +274,21 @@ section. * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. +Cross-validation significance evaluation +---------------------------------------- + +Significance of cross validation scores can be evaluated using +:func:`permutation_test_score` function. The function returns a p-value, which +approximates the probability that the average cross-validation score would be +obtained at by chance. + + +It also returns cross_validation scores for each permutation of y labels. It +permutes the labels of the samples and computes the p-value against the null +hypothesis that the features and the labels are independent, meaning that there +is no difference between the classes. + + Cross validation iterators ========================== diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index cfb55db127bf8..e190f415fac54 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -871,8 +871,9 @@ def _index_param_value(X, v, indices): def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): - """Evaluate the significance of a cross-validated score by permuting - the labels of the samples and computing the p-value against the null + """Evaluates the significance of a cross-validated score by permutations. + + Permutes labels and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there is no difference between the classes. From 5cf5ae7eb44fea31575f46ab6205388d3880a6fd Mon Sep 17 00:00:00 2001 From: Aditi Gupta Date: Sat, 24 Aug 2019 15:43:48 -0400 Subject: [PATCH 7/8] Fixed typos in the permutation_test_score documentation in user guide. --- doc/modules/cross_validation.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 7b10f6a48408a..6e3b90db1d0fd 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -277,13 +277,13 @@ section. Cross-validation significance evaluation ---------------------------------------- -Significance of cross validation scores can be evaluated using +Significance of cross validation scores can be evaluated using the :func:`permutation_test_score` function. The function returns a p-value, which approximates the probability that the average cross-validation score would be -obtained at by chance. +obtained by chance if the target is independent of the data. -It also returns cross_validation scores for each permutation of y labels. It +It also returns cross validation scores for each permutation of y labels. It permutes the labels of the samples and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there is no difference between the classes. From 5252b827ae520fce7014bf925ec4cd6b59f4b0aa Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 26 Aug 2019 11:13:12 -0400 Subject: [PATCH 8/8] Minor changes --- doc/modules/cross_validation.rst | 7 +++++-- sklearn/model_selection/_validation.py | 26 ++++++++++++-------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 152236b10e414..746c3f43d13f5 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -260,6 +260,7 @@ Here is an example of ``cross_validate`` using a single metric:: >>> sorted(scores.keys()) ['estimator', 'fit_time', 'score_time', 'test_score'] + Obtaining predictions by cross-validation ----------------------------------------- @@ -299,15 +300,17 @@ section. * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. + +.. _cv_significance_evaluation: + Cross-validation significance evaluation ---------------------------------------- -Significance of cross validation scores can be evaluated using the +Significance of cross validation scores can be evaluated using the :func:`permutation_test_score` function. The function returns a p-value, which approximates the probability that the average cross-validation score would be obtained by chance if the target is independent of the data. - It also returns cross validation scores for each permutation of y labels. It permutes the labels of the samples and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index d9b53e5d81ef7..aa87d95934ae1 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -961,8 +961,8 @@ def _index_param_value(X, v, indices): def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=None, random_state=0, verbose=0, scoring=None): - """Evaluates the significance of a cross-validated score by permutations. - + """Evaluates the significance of a cross-validated score by permutations. + Permutes labels and computes the p-value against the null hypothesis that the features and the labels are independent, meaning that there is no difference between the classes. @@ -971,13 +971,13 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, classifier would have had a larger error on the original data than in the randomized one. - A small p-value (under a threshold, like :math:`\alpha = 0.05`) gives + A small p-value (under a threshold, like ``0.05``) gives enough evidence to conclude that the classifier has not learned a random pattern in the data. - .. versionadded:: 0.9 + Read more in the :ref:`User Guide `. - Read more in the :ref:`User Guide `. + .. versionadded:: 0.9 Parameters ---------- @@ -1062,17 +1062,15 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. - References - ---------- - - * `"Permutation Tests for Studying Classifier Performance" - `_ - Ojala and Garriga - The Journal of Machine Learning Research (2010) - vol. 11 - Notes ----- - This function implements "Test 1" as described in the paper given above. + This function implements "Test 1" as described in the following paper: + + * `Permutation Tests for Studying Classifier Performance + `_, + Ojala and Garriga - The Journal of Machine Learning Research (2010) + vol. 11 + """ X, y, groups = indexable(X, y, groups)