scikit-learn · raghavrv · Nov 24, 2016 · Nov 28, 2016 · Nov 28, 2016 · Dec 2, 2016
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
@@ -284,7 +284,15 @@ def __init__(self, n_splits, shuffle, random_state):
 
         self.n_splits = n_splits
         self.shuffle = shuffle
-        self.random_state = random_state
+        if shuffle and not isinstance(random_state,
+                                      (np.integer, numbers.Integral)):
+            # This is done to ensure that the multiple calls to split
+            # are random for each initialization of splitter but consistent
+            # across multiple calls for the same initialization.
+            self.random_state = check_random_state(
+                random_state).randint(np.iinfo(np.int32).max)
+        else:
+            self.random_state = random_state
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.
@@ -559,10 +567,7 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None):
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
     def _make_test_folds(self, X, y=None, groups=None):
-        if self.shuffle:
-            rng = check_random_state(self.random_state)
-        else:
-            rng = self.random_state
+        rng = check_random_state(self.random_state)
         y = np.asarray(y)
         n_samples = y.shape[0]
         unique_y, y_inversed = np.unique(y, return_inverse=True)
@@ -922,7 +927,14 @@ def __init__(self, n_splits=10, test_size=0.1, train_size=None,
         self.n_splits = n_splits
         self.test_size = test_size
         self.train_size = train_size
-        self.random_state = random_state
+        if isinstance(random_state, (np.integer, numbers.Integral)):
+            self.random_state = random_state
+        else:
+            # This is done to ensure that the multiple calls to split
+            # are random for each initialization of splitter but consistent
+            # across multiple calls for the same initialization.
+            self.random_state = check_random_state(
+                random_state).randint(np.iinfo(np.int32).max)
 
     def split(self, X, y=None, groups=None):
         """Generate indices to split data into training and test set.

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
@@ -151,7 +151,7 @@ def test_cross_validator_with_default_params():
     skf = StratifiedKFold(n_splits)
     lolo = LeaveOneGroupOut()
     lopo = LeavePGroupsOut(p)
-    ss = ShuffleSplit(random_state=0)
+    ss = ShuffleSplit(random_state=42)
     ps = PredefinedSplit([1, 1, 2, 2])  # n_splits = np of unique folds = 2
 
     loo_repr = "LeaveOneOut()"
@@ -160,7 +160,7 @@ def test_cross_validator_with_default_params():
     skf_repr = "StratifiedKFold(n_splits=2, random_state=None, shuffle=False)"
     lolo_repr = "LeaveOneGroupOut()"
     lopo_repr = "LeavePGroupsOut(n_groups=2)"
-    ss_repr = ("ShuffleSplit(n_splits=10, random_state=0, test_size=0.1, "
+    ss_repr = ("ShuffleSplit(n_splits=10, random_state=42, test_size=0.1, "
                "train_size=None)")
     ps_repr = "PredefinedSplit(test_fold=array([1, 1, 2, 2]))"
 
@@ -423,26 +423,43 @@ def test_shuffle_kfold_stratifiedkfold_reproducibility():
     X2 = np.ones(16)  # Not divisible by 3
     y2 = [0] * 8 + [1] * 8
 
+    # random_state set to int with shuffle=True
     kf = KFold(3, shuffle=True, random_state=0)
     skf = StratifiedKFold(3, shuffle=True, random_state=0)
-
-    for cv in (kf, skf):
-        np.testing.assert_equal(list(cv.split(X, y)), list(cv.split(X, y)))
-        np.testing.assert_equal(list(cv.split(X2, y2)), list(cv.split(X2, y2)))
-
-    kf = KFold(3, shuffle=True)
-    skf = StratifiedKFold(3, shuffle=True)
-
-    for cv in (kf, skf):
-        for data in zip((X, X2), (y, y2)):
+    # random_state set to RandomState object with shuffle=True
+    kf2 = KFold(3, shuffle=True, random_state=np.random.RandomState(0))
+    skf2 = StratifiedKFold(3, shuffle=True,
+                           random_state=np.random.RandomState(0))
+    # random_state not set with shuffle=True
+    kf3 = KFold(3, shuffle=True)
+    skf3 = StratifiedKFold(3, shuffle=True)
+
+    # 1) Test to ensure consistent behavior for multiple split calls
+    #    irrespective of random_state
+    for cv in (kf, skf, kf2, skf2, kf3, skf3):
+        for data in ((X, y), (X2, y2)):
+            # Check that calling split twice yields the same results
+            np.testing.assert_equal(list(cv.split(*data)),
+                                    list(cv.split(*data)))
+
+    # 2) Tests to ensure different initilization produce different splits,
+    #    when random_state is not set
+    kf1 = KFold(3, shuffle=True)
+    skf1 = StratifiedKFold(3, shuffle=True)
+    kf2 = KFold(3, shuffle=True)
+    skf2 = StratifiedKFold(3, shuffle=True)
+    for cv1, cv2 in ((kf1, kf2), (skf1, skf2)):
+        for data in ((X, y), (X2, y2)):
+            # For different initialisations, splits should not be same when
+            # random_state is not set.
             try:
-                np.testing.assert_equal(list(cv.split(*data)),
-                                        list(cv.split(*data)))
+                np.testing.assert_equal(list(cv1.split(*data)),
+                                        list(cv2.split(*data)))
             except AssertionError:
                 pass
             else:
-                raise AssertionError("The splits for data, %s, are same even "
-                                     "when random state is not set" % data)
+                raise AssertionError("When random_state is not set, the splits"
+                                     " are same for different initializations")
 
 
 def test_shuffle_stratifiedkfold():