diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index e42d7f8514438..746c3f43d13f5 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -300,6 +300,23 @@ section.
     * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`,
     * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`.
 
+
+.. _cv_significance_evaluation:
+
+Cross-validation significance evaluation
+----------------------------------------
+
+Significance of cross validation scores can be evaluated using the
+:func:`permutation_test_score` function. The function returns a p-value, which
+approximates the probability that the average cross-validation score would be
+obtained by chance if the target is independent of the data.
+
+It also returns cross validation scores for each permutation of y labels. It
+permutes the labels of the samples and computes the p-value against the null
+hypothesis that the features and the labels are independent, meaning that there
+is no difference between the classes.
+
+
 Cross validation iterators
 ==========================
 
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index 95669a212c8ef..aa87d95934ae1 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -961,9 +961,23 @@ def _index_param_value(X, v, indices):
 def permutation_test_score(estimator, X, y, groups=None, cv=None,
                            n_permutations=100, n_jobs=None, random_state=0,
                            verbose=0, scoring=None):
-    """Evaluate the significance of a cross-validated score with permutations
+    """Evaluates the significance of a cross-validated score by permutations.
 
-    Read more in the :ref:`User Guide <cross_validation>`.
+    Permutes labels and computes the p-value against the null
+    hypothesis that the features and the labels are independent, meaning that
+    there is no difference between the classes.
+
+    The p-value represents the fraction of randomized data sets where the
+    classifier would have had a larger error on the original data
+    than in the randomized one.
+
+    A small p-value (under a threshold, like ``0.05``) gives
+    enough evidence to conclude that the classifier has not learned a random
+    pattern in the data.
+
+    Read more in the :ref:`User Guide <cv_significance_evaluation>`.
+
+    .. versionadded:: 0.9
 
     Parameters
     ----------
@@ -1050,11 +1064,12 @@ def permutation_test_score(estimator, X, y, groups=None, cv=None,
 
     Notes
     -----
-    This function implements Test 1 in:
+    This function implements "Test 1" as described in the following paper:
 
-        Ojala and Garriga. Permutation Tests for Studying Classifier
-        Performance.  The Journal of Machine Learning Research (2010)
-        vol. 11
+    * `Permutation Tests for Studying Classifier Performance
+      <http://ieeexplore.ieee.org/document/5360332/>`_,
+      Ojala and Garriga - The Journal of Machine Learning Research (2010)
+      vol. 11
 
     """
     X, y, groups = indexable(X, y, groups)