From 5dd260c9d1e22bf24dfde2e44e18573ec8f5bed7 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Thu, 24 Nov 2016 16:26:22 +0100
Subject: [PATCH 01/10] Set a random random_state at init to ensure
 deterministic randomness

---
 sklearn/model_selection/_split.py           | 28 +++++++++++++++++----
 sklearn/model_selection/tests/test_split.py | 21 +++++++++++-----
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index b2ed060e31717..fb28191ff1945 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -284,7 +284,16 @@ def __init__(self, n_splits, shuffle, random_state):
 
         self.n_splits = n_splits
         self.shuffle = shuffle
+        # For repr
         self.random_state = random_state
+        if random_state is None:
+            # This is done to ensure that the multiple calls to split
+            # are random for each initialization of splitter but consistent
+            # across multiple calls for the same initialization.
+            self._random_state = check_random_state(
+                random_state).randint(99999999)
+        else:
+            self._random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -407,7 +416,7 @@ def _iter_test_indices(self, X, y=None, groups=None):
         n_samples = _num_samples(X)
         indices = np.arange(n_samples)
         if self.shuffle:
-            check_random_state(self.random_state).shuffle(indices)
+            check_random_state(self._random_state).shuffle(indices)
 
         n_splits = self.n_splits
         fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
@@ -560,9 +569,9 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None):
 
     def _make_test_folds(self, X, y=None, groups=None):
         if self.shuffle:
-            rng = check_random_state(self.random_state)
+            rng = check_random_state(self._random_state)
         else:
-            rng = self.random_state
+            rng = self._random_state
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_y, y_inversed = np.unique(y, return_inverse=True)
@@ -922,7 +931,16 @@ def __init__(self, n_splits=10, test_size=0.1, train_size=None,
         self.n_splits = n_splits
         self.test_size = test_size
         self.train_size = train_size
+        # For repr
         self.random_state = random_state
+        if random_state is None:
+            # This is done to ensure that the multiple calls to split
+            # are random for each initialization of splitter but consistent
+            # across multiple calls for the same initialization.
+            self._random_state = check_random_state(
+                random_state).randint(99999999)
+        else:
+            self._random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -1042,7 +1060,7 @@ def _iter_indices(self, X, y=None, groups=None):
         n_samples = _num_samples(X)
         n_train, n_test = _validate_shuffle_split(n_samples, self.test_size,
                                                   self.train_size)
-        rng = check_random_state(self.random_state)
+        rng = check_random_state(self._random_state)
         for i in range(self.n_splits):
             # random partition
             permutation = rng.permutation(n_samples)
@@ -1269,7 +1287,7 @@ def _iter_indices(self, X, y, groups=None):
                              'equal to the number of classes = %d' %
                              (n_test, n_classes))
 
-        rng = check_random_state(self.random_state)
+        rng = check_random_state(self._random_state)
 
         for _ in range(self.n_splits):
             # if there are ties in the class-counts, we want
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index fba323492be85..b0ad88ef1b8de 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -430,19 +430,28 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
         np.testing.assert_equal(list(cv.split(X, y)), list(cv.split(X, y)))
         np.testing.assert_equal(list(cv.split(X2, y2)), list(cv.split(X2, y2)))
 
+    # Tests to ensure consistent behaviour even when random_state is not set.
     kf = KFold(3, shuffle=True)
     skf = StratifiedKFold(3, shuffle=True)
-
-    for cv in (kf, skf):
+    kf1 = KFold(3, shuffle=True)
+    kf2 = KFold(3, shuffle=True)
+    skf1 = StratifiedKFold(3, shuffle=True)
+    skf2 = StratifiedKFold(3, shuffle=True)
+    for cvs in ((kf, kf1, kf2), (skf, skf1, skf2)):
         for data in zip((X, X2), (y, y2)):
+            # For the same initialilzation, splits should be same across
+            # multiple split calls, even when random_state is not set.
+            np.testing.assert_equal(list(cvs[0].split(*data)),
+                                    list(cvs[0].split(*data)))
+
             try:
-                np.testing.assert_equal(list(cv.split(*data)),
-                                        list(cv.split(*data)))
+                np.testing.assert_equal(list(cvs[1].split(*data)),
+                                        list(cvs[2].split(*data)))
             except AssertionError:
                 pass
             else:
-                raise AssertionError("The splits for data, %s, are same even "
-                                     "when random state is not set" % data)
+                raise AssertionError("When random_state is not set, the splits"
+                                     " are same for different initializations")
 
 
 def test_shuffle_stratifiedkfold():

From 75448b9697dcfcb79ac939f0213cb10af5a1bb96 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 28 Nov 2016 12:47:46 +0100
Subject: [PATCH 02/10] COSMIT

---
 sklearn/model_selection/_split.py           | 2 +-
 sklearn/model_selection/tests/test_split.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index fb28191ff1945..944ae552385b1 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -291,7 +291,7 @@ def __init__(self, n_splits, shuffle, random_state):
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
             self._random_state = check_random_state(
-                random_state).randint(99999999)
+                random_state).randint(np.iinfo(np.int32).max)
         else:
             self._random_state = random_state
 
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index b0ad88ef1b8de..30213551fd73f 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -439,11 +439,13 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
     skf2 = StratifiedKFold(3, shuffle=True)
     for cvs in ((kf, kf1, kf2), (skf, skf1, skf2)):
         for data in zip((X, X2), (y, y2)):
-            # For the same initialilzation, splits should be same across
+            # For the same initialization, splits should be same across
             # multiple split calls, even when random_state is not set.
             np.testing.assert_equal(list(cvs[0].split(*data)),
                                     list(cvs[0].split(*data)))
 
+            # For different initialisations, splits should not be same when
+            # random_state is not set.
             try:
                 np.testing.assert_equal(list(cvs[1].split(*data)),
                                         list(cvs[2].split(*data)))

From 6f5947d8f5d631d572372f60ac784aa1d8300460 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 28 Nov 2016 13:22:27 +0100
Subject: [PATCH 03/10] Use np.iinfo(...).max instead of 99999..

---
 sklearn/model_selection/_split.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 944ae552385b1..855fdb1756785 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -938,7 +938,7 @@ def __init__(self, n_splits=10, test_size=0.1, train_size=None,
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
             self._random_state = check_random_state(
-                random_state).randint(99999999)
+                random_state).randint(np.iinfo(np.int32).max)
         else:
             self._random_state = random_state
 

From d05efd43afcc6b59d8bd8a61947fdb39d60f0e73 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 2 Dec 2016 16:07:55 +0100
Subject: [PATCH 04/10] Handle np.random.RandomState instances; Set expl. state
 for None in repr

---
 sklearn/model_selection/_split.py           | 27 ++++++++-------------
 sklearn/model_selection/tests/test_split.py | 13 ++++++----
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 855fdb1756785..9b39a9fcb2571 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -284,16 +284,14 @@ def __init__(self, n_splits, shuffle, random_state):
 
         self.n_splits = n_splits
         self.shuffle = shuffle
-        # For repr
-        self.random_state = random_state
-        if random_state is None:
+        if not isinstance(random_state, (np.integer, numbers.Integral)):
             # This is done to ensure that the multiple calls to split
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
-            self._random_state = check_random_state(
+            self.random_state = check_random_state(
                 random_state).randint(np.iinfo(np.int32).max)
         else:
-            self._random_state = random_state
+            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -416,7 +414,7 @@ def _iter_test_indices(self, X, y=None, groups=None):
         n_samples = _num_samples(X)
         indices = np.arange(n_samples)
         if self.shuffle:
-            check_random_state(self._random_state).shuffle(indices)
+            check_random_state(self.random_state).shuffle(indices)
 
         n_splits = self.n_splits
         fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
@@ -568,10 +566,7 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None):
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
     def _make_test_folds(self, X, y=None, groups=None):
-        if self.shuffle:
-            rng = check_random_state(self._random_state)
-        else:
-            rng = self._random_state
+        rng = check_random_state(self.random_state)
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_y, y_inversed = np.unique(y, return_inverse=True)
@@ -931,16 +926,14 @@ def __init__(self, n_splits=10, test_size=0.1, train_size=None,
         self.n_splits = n_splits
         self.test_size = test_size
         self.train_size = train_size
-        # For repr
-        self.random_state = random_state
-        if random_state is None:
+        if not isinstance(random_state, (np.integer, numbers.Integral)):
             # This is done to ensure that the multiple calls to split
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
-            self._random_state = check_random_state(
+            self.random_state = check_random_state(
                 random_state).randint(np.iinfo(np.int32).max)
         else:
-            self._random_state = random_state
+            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -1060,7 +1053,7 @@ def _iter_indices(self, X, y=None, groups=None):
         n_samples = _num_samples(X)
         n_train, n_test = _validate_shuffle_split(n_samples, self.test_size,
                                                   self.train_size)
-        rng = check_random_state(self._random_state)
+        rng = check_random_state(self.random_state)
         for i in range(self.n_splits):
             # random partition
             permutation = rng.permutation(n_samples)
@@ -1287,7 +1280,7 @@ def _iter_indices(self, X, y, groups=None):
                              'equal to the number of classes = %d' %
                              (n_test, n_classes))
 
-        rng = check_random_state(self._random_state)
+        rng = check_random_state(self.random_state)
 
         for _ in range(self.n_splits):
             # if there are ties in the class-counts, we want
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 30213551fd73f..0879aa26f6138 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -147,8 +147,8 @@ def test_cross_validator_with_default_params():
     groups = np.array([1, 2, 3, 4])
     loo = LeaveOneOut()
     lpo = LeavePOut(p)
-    kf = KFold(n_splits)
-    skf = StratifiedKFold(n_splits)
+    kf = KFold(n_splits, random_state=0)
+    skf = StratifiedKFold(n_splits, random_state=0)
     lolo = LeaveOneGroupOut()
     lopo = LeavePGroupsOut(p)
     ss = ShuffleSplit(random_state=0)
@@ -156,8 +156,8 @@ def test_cross_validator_with_default_params():
 
     loo_repr = "LeaveOneOut()"
     lpo_repr = "LeavePOut(p=2)"
-    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
-    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
+    kf_repr = "KFold(n_splits=2, random_state=0, shuffle=False)"
+    skf_repr = "StratifiedKFold(n_splits=2, random_state=0, shuffle=False)"
     lolo_repr = "LeaveOneGroupOut()"
     lopo_repr = "LeavePGroupsOut(n_groups=2)"
     ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, "
@@ -425,8 +425,11 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
 
     kf = KFold(3, shuffle=True, random_state=0)
     skf = StratifiedKFold(3, shuffle=True, random_state=0)
+    kf2 = KFold(3, shuffle=True, random_state=np.random.RandomState(0))
+    skf2 = StratifiedKFold(3, shuffle=True,
+                           random_state=np.random.RandomState(0))
 
-    for cv in (kf, skf):
+    for cv in (kf, skf, kf2, skf2):
         np.testing.assert_equal(list(cv.split(X, y)), list(cv.split(X, y)))
         np.testing.assert_equal(list(cv.split(X2, y2)), list(cv.split(X2, y2)))
 

From 374d2ed4322d32cd33a36bddaeede6841081f5a7 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Fri, 2 Dec 2016 17:08:20 +0100
Subject: [PATCH 05/10] Fix doctest

---
 sklearn/model_selection/_split.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 9b39a9fcb2571..a3d3dd66ae28a 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -378,11 +378,11 @@ class KFold(_BaseKFold):
     >>> from sklearn.model_selection import KFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([1, 2, 3, 4])
-    >>> kf = KFold(n_splits=2)
+    >>> kf = KFold(n_splits=2, random_state=0)
     >>> kf.get_n_splits(X)
     2
     >>> print(kf)  # doctest: +NORMALIZE_WHITESPACE
-    KFold(n_splits=2, random_state=None, shuffle=False)
+    KFold(n_splits=2, random_state=0, shuffle=False)
     >>> for train_index, test_index in kf.split(X):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -543,11 +543,11 @@ class StratifiedKFold(_BaseKFold):
     >>> from sklearn.model_selection import StratifiedKFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([0, 0, 1, 1])
-    >>> skf = StratifiedKFold(n_splits=2)
+    >>> skf = StratifiedKFold(n_splits=2, random_state=0)
     >>> skf.get_n_splits(X, y)
     2
     >>> print(skf)  # doctest: +NORMALIZE_WHITESPACE
-    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
+    StratifiedKFold(n_splits=2, random_state=0, shuffle=False)
     >>> for train_index, test_index in skf.split(X, y):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]

From d3db61799079de21de2ebbe915817bb3bd71bf30 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 5 Dec 2016 16:08:41 +0100
Subject: [PATCH 06/10] Change order instead of negating

---
 sklearn/model_selection/_split.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index a3d3dd66ae28a..4a2d6fcaff3cc 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -284,14 +284,14 @@ def __init__(self, n_splits, shuffle, random_state):
 
         self.n_splits = n_splits
         self.shuffle = shuffle
-        if not isinstance(random_state, (np.integer, numbers.Integral)):
+        if isinstance(random_state, (np.integer, numbers.Integral)):
+            self.random_state = random_state
+        else:
             # This is done to ensure that the multiple calls to split
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
             self.random_state = check_random_state(
                 random_state).randint(np.iinfo(np.int32).max)
-        else:
-            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -926,14 +926,14 @@ def __init__(self, n_splits=10, test_size=0.1, train_size=None,
         self.n_splits = n_splits
         self.test_size = test_size
         self.train_size = train_size
-        if not isinstance(random_state, (np.integer, numbers.Integral)):
+        if isinstance(random_state, (np.integer, numbers.Integral)):
+            self.random_state = random_state
+        else:
             # This is done to ensure that the multiple calls to split
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
             self.random_state = check_random_state(
                 random_state).randint(np.iinfo(np.int32).max)
-        else:
-            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.

From c762363276caa65c4ba1e0fc1b3cbe3c9c8afdb9 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Mon, 5 Dec 2016 16:11:13 +0100
Subject: [PATCH 07/10] Comment clarity

---
 sklearn/model_selection/tests/test_split.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 0879aa26f6138..d99ae3e24924d 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -429,11 +429,13 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
     skf2 = StratifiedKFold(3, shuffle=True,
                            random_state=np.random.RandomState(0))
 
+    # 1) Test to ensure consistent behavior when random_state is set explicitly
     for cv in (kf, skf, kf2, skf2):
+        # Check that calling split twice yields the same results
         np.testing.assert_equal(list(cv.split(X, y)), list(cv.split(X, y)))
         np.testing.assert_equal(list(cv.split(X2, y2)), list(cv.split(X2, y2)))
 
-    # Tests to ensure consistent behaviour even when random_state is not set.
+    # 2) Tests to ensure consistent behavior even when random_state is not set
     kf = KFold(3, shuffle=True)
     skf = StratifiedKFold(3, shuffle=True)
     kf1 = KFold(3, shuffle=True)

From 318312e51c9c7277a793c63c405a87e1bd8c4048 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Tue, 6 Dec 2016 16:22:23 +0100
Subject: [PATCH 08/10] Modify random_state only when shuffle is set to True

---
 sklearn/model_selection/_split.py           |  7 ++++---
 sklearn/model_selection/tests/test_split.py | 12 ++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 4a2d6fcaff3cc..f0298ec4d1904 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -284,14 +284,15 @@ def __init__(self, n_splits, shuffle, random_state):
 
         self.n_splits = n_splits
         self.shuffle = shuffle
-        if isinstance(random_state, (np.integer, numbers.Integral)):
-            self.random_state = random_state
-        else:
+        if shuffle and not isinstance(random_state,
+                                      (np.integer, numbers.Integral)):
             # This is done to ensure that the multiple calls to split
             # are random for each initialization of splitter but consistent
             # across multiple calls for the same initialization.
             self.random_state = check_random_state(
                 random_state).randint(np.iinfo(np.int32).max)
+        else:
+            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index d99ae3e24924d..e265a8d4ed6b4 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -147,20 +147,20 @@ def test_cross_validator_with_default_params():
     groups = np.array([1, 2, 3, 4])
     loo = LeaveOneOut()
     lpo = LeavePOut(p)
-    kf = KFold(n_splits, random_state=0)
-    skf = StratifiedKFold(n_splits, random_state=0)
+    kf = KFold(n_splits)
+    skf = StratifiedKFold(n_splits)
     lolo = LeaveOneGroupOut()
     lopo = LeavePGroupsOut(p)
-    ss = ShuffleSplit(random_state=0)
+    ss = ShuffleSplit(random_state=42)
     ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2
 
     loo_repr = "LeaveOneOut()"
     lpo_repr = "LeavePOut(p=2)"
-    kf_repr = "KFold(n_splits=2, random_state=0, shuffle=False)"
-    skf_repr = "StratifiedKFold(n_splits=2, random_state=0, shuffle=False)"
+    kf_repr = "KFold(n_splits=2, random_state=None, shuffle=False)"
+    skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
     lolo_repr = "LeaveOneGroupOut()"
     lopo_repr = "LeavePGroupsOut(n_groups=2)"
-    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, "
+    ss_repr = ("ShuffleSplit(n_splits=10, random_state=42, test_size=0.1, "
                "train_size=None)")
     ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
 

From a637c9932831941a6d5e1e9ec4305dce807f475b Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Tue, 6 Dec 2016 16:23:59 +0100
Subject: [PATCH 09/10] Remove random_state param from example docs

---
 sklearn/model_selection/_split.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index f0298ec4d1904..21a0cac699aa0 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -379,11 +379,11 @@ class KFold(_BaseKFold):
     >>> from sklearn.model_selection import KFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([1, 2, 3, 4])
-    >>> kf = KFold(n_splits=2, random_state=0)
+    >>> kf = KFold(n_splits=2)
     >>> kf.get_n_splits(X)
     2
     >>> print(kf)  # doctest: +NORMALIZE_WHITESPACE
-    KFold(n_splits=2, random_state=0, shuffle=False)
+    KFold(n_splits=2, random_state=None, shuffle=False)
     >>> for train_index, test_index in kf.split(X):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]
@@ -544,11 +544,11 @@ class StratifiedKFold(_BaseKFold):
     >>> from sklearn.model_selection import StratifiedKFold
     >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
     >>> y = np.array([0, 0, 1, 1])
-    >>> skf = StratifiedKFold(n_splits=2, random_state=0)
+    >>> skf = StratifiedKFold(n_splits=2)
     >>> skf.get_n_splits(X, y)
     2
     >>> print(skf)  # doctest: +NORMALIZE_WHITESPACE
-    StratifiedKFold(n_splits=2, random_state=0, shuffle=False)
+    StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
     >>> for train_index, test_index in skf.split(X, y):
     ...    print("TRAIN:", train_index, "TEST:", test_index)
     ...    X_train, X_test = X[train_index], X[test_index]

From bc9f22c20f6ae90a480767659ec3651fef056002 Mon Sep 17 00:00:00 2001
From: Raghav RV <rvraghav93@gmail.com>
Date: Tue, 6 Dec 2016 16:35:19 +0100
Subject: [PATCH 10/10] Simplify tests

---
 sklearn/model_selection/tests/test_split.py | 41 +++++++++++----------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index e265a8d4ed6b4..4fee7ef090de4 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -423,37 +423,38 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
     X2 = np.ones(16)  # Not divisible by 3
     y2 = [0] * 8 + [1] * 8
 
+    # random_state set to int with shuffle=True
     kf = KFold(3, shuffle=True, random_state=0)
     skf = StratifiedKFold(3, shuffle=True, random_state=0)
+    # random_state set to RandomState object with shuffle=True
     kf2 = KFold(3, shuffle=True, random_state=np.random.RandomState(0))
     skf2 = StratifiedKFold(3, shuffle=True,
                            random_state=np.random.RandomState(0))
-
-    # 1) Test to ensure consistent behavior when random_state is set explicitly
-    for cv in (kf, skf, kf2, skf2):
-        # Check that calling split twice yields the same results
-        np.testing.assert_equal(list(cv.split(X, y)), list(cv.split(X, y)))
-        np.testing.assert_equal(list(cv.split(X2, y2)), list(cv.split(X2, y2)))
-
-    # 2) Tests to ensure consistent behavior even when random_state is not set
-    kf = KFold(3, shuffle=True)
-    skf = StratifiedKFold(3, shuffle=True)
+    # random_state not set with shuffle=True
+    kf3 = KFold(3, shuffle=True)
+    skf3 = StratifiedKFold(3, shuffle=True)
+
+    # 1) Test to ensure consistent behavior for multiple split calls
+    #    irrespective of random_state
+    for cv in (kf, skf, kf2, skf2, kf3, skf3):
+        for data in ((X, y), (X2, y2)):
+            # Check that calling split twice yields the same results
+            np.testing.assert_equal(list(cv.split(*data)),
+                                    list(cv.split(*data)))
+
+    # 2) Tests to ensure different initilization produce different splits,
+    #    when random_state is not set
     kf1 = KFold(3, shuffle=True)
-    kf2 = KFold(3, shuffle=True)
     skf1 = StratifiedKFold(3, shuffle=True)
+    kf2 = KFold(3, shuffle=True)
     skf2 = StratifiedKFold(3, shuffle=True)
-    for cvs in ((kf, kf1, kf2), (skf, skf1, skf2)):
-        for data in zip((X, X2), (y, y2)):
-            # For the same initialization, splits should be same across
-            # multiple split calls, even when random_state is not set.
-            np.testing.assert_equal(list(cvs[0].split(*data)),
-                                    list(cvs[0].split(*data)))
-
+    for cv1, cv2 in ((kf1, kf2), (skf1, skf2)):
+        for data in ((X, y), (X2, y2)):
             # For different initialisations, splits should not be same when
             # random_state is not set.
             try:
-                np.testing.assert_equal(list(cvs[1].split(*data)),
-                                        list(cvs[2].split(*data)))
+                np.testing.assert_equal(list(cv1.split(*data)),
+                                        list(cv2.split(*data)))
             except AssertionError:
                 pass
             else: