diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index e42d7f8514438..746c3f43d13f5 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -300,6 +300,23 @@ section. * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. + +.. _cv_significance_evaluation: + +Cross-validation significance evaluation +---------------------------------------- + +Significance of cross validation scores can be evaluated using the +:func:`permutation_test_score` function. The function returns a p-value, which +approximates the probability that the average cross-validation score would be +obtained by chance if the target is independent of the data. + +It also returns cross validation scores for each permutation of y labels. It +permutes the labels of the samples and computes the p-value against the null +hypothesis that the features and the labels are independent, meaning that there +is no difference between the classes. + + Cross validation iterators ========================== diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 95669a212c8ef..aa87d95934ae1 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -961,9 +961,23 @@ def _index_param_value(X, v, indices): def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=None, random_state=0, verbose=0, scoring=None): - """Evaluate the significance of a cross-validated score with permutations + """Evaluates the significance of a cross-validated score by permutations. - Read more in the :ref:`User Guide `. + Permutes labels and computes the p-value against the null + hypothesis that the features and the labels are independent, meaning that + there is no difference between the classes. + + The p-value represents the fraction of randomized data sets where the + classifier would have had a larger error on the original data + than in the randomized one. + + A small p-value (under a threshold, like ``0.05``) gives + enough evidence to conclude that the classifier has not learned a random + pattern in the data. + + Read more in the :ref:`User Guide `. + + .. versionadded:: 0.9 Parameters ---------- @@ -1050,11 +1064,12 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None, Notes ----- - This function implements Test 1 in: + This function implements "Test 1" as described in the following paper: - Ojala and Garriga. Permutation Tests for Studying Classifier - Performance. The Journal of Machine Learning Research (2010) - vol. 11 + * `Permutation Tests for Studying Classifier Performance + `_, + Ojala and Garriga - The Journal of Machine Learning Research (2010) + vol. 11 """ X, y, groups = indexable(X, y, groups)