From 3e70301a4d4eaca26f962de935477d0e118bf027 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 4 Nov 2016 11:19:04 +0100
Subject: [PATCH 01/16] DOC Add NOTE that unless random_state is set, split
 will not be identical

---
 sklearn/model_selection/_split.py | 42 +++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 642e8107e185d..04cc49e06d752 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -84,6 +84,13 @@ def split(self, X, y=None, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets if ``random_state`` parameter exists and is
+        not explicitly set to an integer value.
         """
         X, y, groups = indexable(X, y, groups)
         indices = np.arange(_num_samples(X))
@@ -309,6 +316,13 @@ def split(self, X, y=None, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets if ``random_state`` parameter exists and is
+        not explicitly set to an integer value.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -632,6 +646,13 @@ def split(self, X, y, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets unless ``random_state`` is set to an integer
+        value.
         """
         return super(StratifiedKFold, self).split(X, y, groups)
 
@@ -709,6 +730,13 @@ def split(self, X, y=None, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets unless ``random_state`` is set to an integer
+        value.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -938,6 +966,13 @@ def split(self, X, y=None, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets unless ``random_state`` is set to an integer
+        value.
         """
         X, y, groups = indexable(X, y, groups)
         for train, test in self._iter_indices(X, y, groups):
@@ -1304,6 +1339,13 @@ def split(self, X, y, groups=None):
 
         test : ndarray
             The testing set indices for that split.
+
+        Note
+        ----
+
+        Multiple calls to the ``split`` method will not return identical
+        training or testing sets unless ``random_state`` is set to an integer
+        value.
         """
         return super(StratifiedShuffleSplit, self).split(X, y, groups)
 

From 791766f92d27b02d5f5f4db6184cb7eceede37ba Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 4 Nov 2016 11:39:52 +0100
Subject: [PATCH 02/16] TST use np.testing.assert_equal for nested lists/arrays

---
 sklearn/model_selection/tests/test_split.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 936abf03ac055..ff620b5088ae8 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -880,10 +880,15 @@ def test_cv_iterable_wrapper():
     # results
     kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
     kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
-    assert_array_equal(list(kf_randomized_iter_wrapped.split(X, y)),
-                       list(kf_randomized_iter_wrapped.split(X, y)))
-    assert_true(np.any(np.array(list(kf_iter_wrapped.split(X, y))) !=
-                       np.array(list(kf_randomized_iter_wrapped.split(X, y)))))
+    np.testing.assert_array_equal(
+        list(kf_randomized_iter_wrapped.split(X, y)),
+        list(kf_randomized_iter_wrapped.split(X, y)))
+    try:
+        np.testing.assert_equal(
+            np.array(list(kf_iter_wrapped.split(X, y))),
+            np.array(list(kf_randomized_iter_wrapped.split(X, y))))
+    except AssertionError:
+        pass
 
 
 def test_group_kfold():

From 5b226fef8c3f1770cbd8905ca0b231b765956af2 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 4 Nov 2016 11:40:12 +0100
Subject: [PATCH 03/16] TST Make sure cv param can be a generator

---
 sklearn/model_selection/tests/test_search.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 1ce28755075a4..f733be29d2282 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1175,12 +1175,30 @@ def test_grid_search_cv_splits_consistency():
                        cv=KFold(n_splits=n_splits))
     gs2.fit(X, y)
 
+    # Give generator as a cv parameter
+    gs3 = GridSearchCV(LinearSVC(random_state=0),
+                       param_grid={'C': [0.1, 0.2, 0.3]},
+                       cv=KFold(n_splits=n_splits, shuffle=True,
+                                random_state=0).split(X, y))
+    gs3.fit(X, y)
+
+    gs4 = GridSearchCV(LinearSVC(random_state=0),
+                       param_grid={'C': [0.1, 0.2, 0.3]},
+                       cv=KFold(n_splits=n_splits, shuffle=True,
+                                random_state=0))
+    gs4.fit(X, y)
+
+
     def _pop_time_keys(cv_results):
         for key in ('mean_fit_time', 'std_fit_time',
                     'mean_score_time', 'std_score_time'):
             cv_results.pop(key)
         return cv_results
 
+    # Check if generators as supported as cv and that the splits are consistent
+    np.testing.assert_equal(_pop_time_keys(gs3.cv_results_),
+                            _pop_time_keys(gs4.cv_results_))
+
     # OneTimeSplitter is a non-re-entrant cv where split can be called only
     # once if ``cv.split`` is called once per param setting in GridSearchCV.fit
     # the 2nd and 3rd parameter will not be evaluated as no train/test indices

From 4f188250b3d2bcd4d9855032bf85f9a0df1adf6c Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 4 Nov 2016 18:02:49 +0100
Subject: [PATCH 04/16] DOC rank_ becomes a link when rendered

---
 sklearn/model_selection/_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index d2f5542ebd32f..2be5c9abfdcd6 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -816,7 +816,7 @@ class GridSearchCV(BaseSearchCV):
         For instance the below given table
 
         +------------+-----------+------------+-----------------+---+---------+
-        |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_....|
+        |param_kernel|param_gamma|param_degree|split0_test_score|...| rank... |
         +============+===========+============+=================+===+=========+
         |  'poly'    |     --    |      2     |        0.8      |...|    2    |
         +------------+-----------+------------+-----------------+---+---------+

From 9f483828f8c1ebd1baed88ce4f5b1b3589ab6dea Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sun, 6 Nov 2016 21:34:41 +0100
Subject: [PATCH 05/16] Use test_...

---
 sklearn/model_selection/_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 2be5c9abfdcd6..8a78d14ceb774 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -816,7 +816,7 @@ class GridSearchCV(BaseSearchCV):
         For instance the below given table
 
         +------------+-----------+------------+-----------------+---+---------+
-        |param_kernel|param_gamma|param_degree|split0_test_score|...| rank... |
+        |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|
         +============+===========+============+=================+===+=========+
         |  'poly'    |     --    |      2     |        0.8      |...|    2    |
         +------------+-----------+------------+-----------------+---+---------+

From 29eef94b8e1a4e245544d4678364926dc9280a1e Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sun, 6 Nov 2016 21:35:07 +0100
Subject: [PATCH 06/16] Remove blank line; Add if shuffle is True

---
 sklearn/model_selection/_split.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 04cc49e06d752..489056b2e089d 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -87,7 +87,6 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets if ``random_state`` parameter exists and is
         not explicitly set to an integer value.
@@ -319,7 +318,6 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets if ``random_state`` parameter exists and is
         not explicitly set to an integer value.
@@ -649,10 +647,9 @@ def split(self, X, y, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets unless ``random_state`` is set to an integer
-        value.
+        value, if ``shuffle=True``.
         """
         return super(StratifiedKFold, self).split(X, y, groups)
 
@@ -733,10 +730,9 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets unless ``random_state`` is set to an integer
-        value.
+        value, if ``shuffle=True``.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -969,7 +965,6 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets unless ``random_state`` is set to an integer
         value.
@@ -1342,7 +1337,6 @@ def split(self, X, y, groups=None):
 
         Note
         ----
-
         Multiple calls to the ``split`` method will not return identical
         training or testing sets unless ``random_state`` is set to an integer
         value.

From cb5ff5d62e28458423e6f76f579bad9f7560049d Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 7 Nov 2016 13:47:10 +0100
Subject: [PATCH 07/16] Fix tests

---
 sklearn/model_selection/tests/test_split.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index ff620b5088ae8..b9391a0261ad6 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -889,6 +889,10 @@ def test_cv_iterable_wrapper():
             np.array(list(kf_randomized_iter_wrapped.split(X, y))))
     except AssertionError:
         pass
+    else:
+        raise AssertionError("The splits for data are same for randomized "
+                             "and non-randomized versions of kfold iter "
+                             "wrapped by _CVIterableWrapper")
 
 
 def test_group_kfold():

From 1c6d169e214be5ce8c99232cac94f69b9fe891e1 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 7 Nov 2016 13:51:54 +0100
Subject: [PATCH 08/16] Explicitly test for GeneratorType

---
 sklearn/model_selection/tests/test_search.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index f733be29d2282..e2fb9e6e8ab0e 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -7,6 +7,7 @@
 from itertools import chain, product
 import pickle
 import sys
+from types import GeneratorType
 
 import numpy as np
 import scipy.sparse as sp
@@ -1175,6 +1176,10 @@ def test_grid_search_cv_splits_consistency():
                        cv=KFold(n_splits=n_splits))
     gs2.fit(X, y)
 
+    assert_true(isinstance(KFold(n_splits=n_splits,
+                                 shuffle=True, random_state=0).split(X, y),
+                           GeneratorType))
+
     # Give generator as a cv parameter
     gs3 = GridSearchCV(LinearSVC(random_state=0),
                        param_grid={'C': [0.1, 0.2, 0.3]},

From d355dcab8421635e2581ad2ab7c6125cca61c685 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 10 Nov 2016 13:24:25 +0100
Subject: [PATCH 09/16] TST Add the else clause

---
 sklearn/model_selection/tests/test_search.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index e2fb9e6e8ab0e..de12731aff214 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -858,11 +858,15 @@ def test_search_cv_results_rank_tie_breaking():
                                 cv_results['mean_test_score'][2])
         except AssertionError:
             pass
+        else:
+            raise AssertionError("The values are not different.")
         try:
             assert_almost_equal(cv_results['mean_train_score'][1],
                                 cv_results['mean_train_score'][2])
         except AssertionError:
             pass
+        else:
+            raise AssertionError("The values are not different.")
         # 'min' rank should be assigned to the tied candidates
         assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])
 

From dff2f5a4cc3a985c87b98ba9a78aa9a0b70641dc Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 10 Nov 2016 13:28:03 +0100
Subject: [PATCH 10/16] TST Add comment on usage of
 np.testing.assert_array_equal

---
 sklearn/model_selection/tests/test_split.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index b9391a0261ad6..f2c3b3f27ab3f 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -356,9 +356,11 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
 
     for cv in (kf, skf):
         for data in zip((X, X2), (y, y2)):
+            # Test if the two splits are different
+            # numpy's assert_equal properly compares nested lists
             try:
-                np.testing.assert_equal(list(cv.split(*data)),
-                                        list(cv.split(*data)))
+                np.testing.assert_array_equal(list(cv.split(*data)),
+                                              list(cv.split(*data)))
             except AssertionError:
                 pass
             else:
@@ -880,6 +882,7 @@ def test_cv_iterable_wrapper():
     # results
     kf_randomized_iter = KFold(n_splits=5, shuffle=True).split(X, y)
     kf_randomized_iter_wrapped = check_cv(kf_randomized_iter)
+    # numpy's assert_array_equal properly compares nested lists
     np.testing.assert_array_equal(
         list(kf_randomized_iter_wrapped.split(X, y)),
         list(kf_randomized_iter_wrapped.split(X, y)))

From af8107d16b0cb223acd6477ac7d3b57146f7dff3 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 29 Jun 2017 18:41:29 +0200
Subject: [PATCH 11/16] TYPO

---
 sklearn/model_selection/tests/test_search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index ab2837a319cef..52e4af3183288 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1301,7 +1301,8 @@ def _pop_time_keys(cv_results):
             cv_results.pop(key)
         return cv_results
 
-    # Check if generators as supported as cv and that the splits are consistent
+    # Check if generators are supported as cv and
+    # that the splits are consistent
     np.testing.assert_equal(_pop_time_keys(gs3.cv_results_),
                             _pop_time_keys(gs4.cv_results_))
 

From a2fcd332c801516abc87aa4104d7bbdc641b832b Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 29 Jun 2017 19:13:54 +0200
Subject: [PATCH 12/16] MNT Remove if ;

---
 sklearn/model_selection/_split.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 1c48ea1fd5ffc..9bbb331e5cd96 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -579,10 +579,7 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None):
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
     def _make_test_folds(self, X, y=None, groups=None):
-        if self.shuffle:
-            rng = check_random_state(self.random_state)
-        else:
-            rng = self.random_state
+        rng = self.random_state
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_y, y_inversed = np.unique(y, return_inverse=True)

From f02d50e82d77e656b36fa25c10ff57cb251a062a Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Wed, 12 Jul 2017 11:06:20 -0500
Subject: [PATCH 13/16] Address Joel's comments

---
 doc/modules/cross_validation.rst             |  4 +++
 sklearn/model_selection/_split.py            | 36 ++++++++++----------
 sklearn/model_selection/tests/test_search.py | 18 +++-------
 3 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index ab7d2227447b1..69ca6e1edc8c1 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -728,6 +728,10 @@ to shuffle the data indices before splitting them. Note that:
 * To ensure results are repeatable (*on the same platform*), use a fixed value
   for ``random_state``.
 
+The randomized CV splitters may return different results for each call of
+split. This can be avoided (and identical results returned for each split) by
+setting ``random_state`` to an integer.
+
 Cross validation and model selection
 ====================================
 
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 973c0b79b061e..0122f01059369 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -86,9 +86,9 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets if ``random_state`` parameter exists and is
-        not explicitly set to an integer value.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting ``random_state`` to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         indices = np.arange(_num_samples(X))
@@ -317,9 +317,9 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets if ``random_state`` parameter exists and is
-        not explicitly set to an integer value.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting ``random_state`` to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -657,9 +657,9 @@ def split(self, X, y, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets unless ``random_state`` is set to an integer
-        value, if ``shuffle=True``.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting random_state to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
         return super(StratifiedKFold, self).split(X, y, groups)
@@ -744,9 +744,9 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets unless ``random_state`` is set to an integer
-        value, if ``shuffle=True``.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting random_state to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -1188,9 +1188,9 @@ def split(self, X, y=None, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets unless ``random_state`` is set to an integer
-        value.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting random_state to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         for train, test in self._iter_indices(X, y, groups):
@@ -1608,9 +1608,9 @@ def split(self, X, y, groups=None):
 
         Note
         ----
-        Multiple calls to the ``split`` method will not return identical
-        training or testing sets unless ``random_state`` is set to an integer
-        value.
+        Randomized CV splitters may return different results for each call of
+        split. This can be avoided (and identical results returned for each
+        split) by setting random_state to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
         return super(StratifiedShuffleSplit, self).split(X, y, groups)
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 8065a4e8f14df..058269c0d2c0b 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1071,20 +1071,10 @@ def test_search_cv_results_rank_tie_breaking():
                             cv_results['mean_test_score'][1])
         assert_almost_equal(cv_results['mean_train_score'][0],
                             cv_results['mean_train_score'][1])
-        try:
-            assert_almost_equal(cv_results['mean_test_score'][1],
-                                cv_results['mean_test_score'][2])
-        except AssertionError:
-            pass
-        else:
-            raise AssertionError("The values are not different.")
-        try:
-            assert_almost_equal(cv_results['mean_train_score'][1],
-                                cv_results['mean_train_score'][2])
-        except AssertionError:
-            pass
-        else:
-            raise AssertionError("The values are not different.")
+        assert_false(np.allclose(cv_results['mean_test_score'][1],
+                                 cv_results['mean_test_score'][2]))
+        assert_false(np.allclose(cv_results['mean_train_score'][1],
+                                 cv_results['mean_train_score'][2]))
         # 'min' rank should be assigned to the tied candidates
         assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])
 

From 8a913597cdb05b6ca5332ecad898de5c38715dc5 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sat, 15 Jul 2017 10:16:03 -0500
Subject: [PATCH 14/16] merge the identical points in doc

---
 doc/modules/cross_validation.rst | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 69ca6e1edc8c1..a3064c3c9f6f6 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -725,12 +725,7 @@ to shuffle the data indices before splitting them. Note that:
   shuffling will be different every time ``KFold(..., shuffle=True)`` is
   iterated. However, ``GridSearchCV`` will use the same shuffling for each set
   of parameters validated by a single call to its ``fit`` method.
-* To ensure results are repeatable (*on the same platform*), use a fixed value
-  for ``random_state``.
-
-The randomized CV splitters may return different results for each call of
-split. This can be avoided (and identical results returned for each split) by
-setting ``random_state`` to an integer.
+* To get identical results for each split, set ``random_state`` to an integer.
 
 Cross validation and model selection
 ====================================

From a2346974fdc3b4c4fde8e2b30b7dee258febad36 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sat, 15 Jul 2017 12:02:14 -0500
Subject: [PATCH 15/16] DOC address Andy's comments

---
 sklearn/model_selection/_split.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 0122f01059369..386d439184117 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -87,8 +87,8 @@ def split(self, X, y=None, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting ``random_state`` to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         indices = np.arange(_num_samples(X))
@@ -318,8 +318,8 @@ def split(self, X, y=None, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting ``random_state`` to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -658,8 +658,8 @@ def split(self, X, y, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting random_state to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
         return super(StratifiedKFold, self).split(X, y, groups)
@@ -745,8 +745,8 @@ def split(self, X, y=None, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting random_state to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         n_samples = _num_samples(X)
@@ -1189,8 +1189,8 @@ def split(self, X, y=None, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting random_state to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         X, y, groups = indexable(X, y, groups)
         for train, test in self._iter_indices(X, y, groups):
@@ -1609,8 +1609,8 @@ def split(self, X, y, groups=None):
         Note
         ----
         Randomized CV splitters may return different results for each call of
-        split. This can be avoided (and identical results returned for each
-        split) by setting random_state to an integer.
+        split. You can make the results identical by setting ``random_state``
+        to an integer.
         """
         y = check_array(y, ensure_2d=False, dtype=None)
         return super(StratifiedShuffleSplit, self).split(X, y, groups)

From b4c633f1d1c7a1d935541c6dfb07bcf853ff33da Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Sun, 16 Jul 2017 12:02:56 -0500
Subject: [PATCH 16/16] Move comment to before the check for generator type

---
 sklearn/model_selection/tests/test_search.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 058269c0d2c0b..5e667727d9dda 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1416,11 +1416,10 @@ def test_grid_search_cv_splits_consistency():
                        cv=KFold(n_splits=n_splits))
     gs2.fit(X, y)
 
+    # Give generator as a cv parameter
     assert_true(isinstance(KFold(n_splits=n_splits,
                                  shuffle=True, random_state=0).split(X, y),
                            GeneratorType))
-
-    # Give generator as a cv parameter
     gs3 = GridSearchCV(LinearSVC(random_state=0),
                        param_grid={'C': [0.1, 0.2, 0.3]},
                        cv=KFold(n_splits=n_splits, shuffle=True,