@@ -1133,7 +1133,7 @@ def __init__(self, n_splits=5, n_repeats=10, random_state=None):
1133
1133
class BaseShuffleSplit (with_metaclass (ABCMeta )):
1134
1134
"""Base class for ShuffleSplit and StratifiedShuffleSplit"""
1135
1135
1136
- def __init__ (self , n_splits = 10 , test_size = 0.1 , train_size = None ,
1136
+ def __init__ (self , n_splits = 10 , test_size = "default" , train_size = None ,
1137
1137
random_state = None ):
1138
1138
_validate_shuffle_split_init (test_size , train_size )
1139
1139
self .n_splits = n_splits
@@ -1211,16 +1211,20 @@ class ShuffleSplit(BaseShuffleSplit):
1211
1211
1212
1212
Parameters
1213
1213
----------
1214
- n_splits : int ( default 10)
1214
+ n_splits : int, default 10
1215
1215
Number of re-shuffling & splitting iterations.
1216
1216
1217
- test_size : float, int, or None, default 0.1
1218
- If float, should be between 0.0 and 1.0 and represent the
1219
- proportion of the dataset to include in the test split. If
1220
- int, represents the absolute number of test samples. If None,
1221
- the value is automatically set to the complement of the train size.
1222
-
1223
- train_size : float, int, or None (default is None)
1217
+ test_size : float, int, None, default=0.1
1218
+ If float, should be between 0.0 and 1.0 and represent the proportion
1219
+ of the dataset to include in the test split. If int, represents the
1220
+ absolute number of test samples. If None, the value is set to the
1221
+ complement of the train size. By default (the is parameter
1222
+ unspecified), the value is set to 0.1.
1223
+ The default will change in version 0.21. It will remain 0.1 only
1224
+ if ``train_size`` is unspecified, otherwise it will complement
1225
+ the specified ``train_size``.
1226
+
1227
+ train_size : float, int, or None, default=None
1224
1228
If float, should be between 0.0 and 1.0 and represent the
1225
1229
proportion of the dataset to include in the train split. If
1226
1230
int, represents the absolute number of train samples. If None,
@@ -1260,7 +1264,8 @@ class ShuffleSplit(BaseShuffleSplit):
1260
1264
1261
1265
def _iter_indices (self , X , y = None , groups = None ):
1262
1266
n_samples = _num_samples (X )
1263
- n_train , n_test = _validate_shuffle_split (n_samples , self .test_size ,
1267
+ n_train , n_test = _validate_shuffle_split (n_samples ,
1268
+ self .test_size ,
1264
1269
self .train_size )
1265
1270
rng = check_random_state (self .random_state )
1266
1271
for i in range (self .n_splits ):
@@ -1299,13 +1304,16 @@ class GroupShuffleSplit(ShuffleSplit):
1299
1304
n_splits : int (default 5)
1300
1305
Number of re-shuffling & splitting iterations.
1301
1306
1302
- test_size : float (default 0.2), int, or None
1303
- If float, should be between 0.0 and 1.0 and represent the
1304
- proportion of the groups to include in the test split. If
1305
- int, represents the absolute number of test groups. If None,
1306
- the value is automatically set to the complement of the train size.
1307
+ test_size : float, int, None, optional
1308
+ If float, should be between 0.0 and 1.0 and represent the proportion
1309
+ of the dataset to include in the test split. If int, represents the
1310
+ absolute number of test samples. If None, the value is set to the
1311
+ complement of the train size. By default, the value is set to 0.2.
1312
+ The default will change in version 0.21. It will remain 0.2 only
1313
+ if ``train_size`` is unspecified, otherwise it will complement
1314
+ the specified ``train_size``.
1307
1315
1308
- train_size : float, int, or None ( default is None)
1316
+ train_size : float, int, or None, default is None
1309
1317
If float, should be between 0.0 and 1.0 and represent the
1310
1318
proportion of the groups to include in the train split. If
1311
1319
int, represents the absolute number of train groups. If None,
@@ -1319,8 +1327,16 @@ class GroupShuffleSplit(ShuffleSplit):
1319
1327
1320
1328
'''
1321
1329
1322
- def __init__ (self , n_splits = 5 , test_size = 0.2 , train_size = None ,
1330
+ def __init__ (self , n_splits = 5 , test_size = "default" , train_size = None ,
1323
1331
random_state = None ):
1332
+ if test_size == "default" :
1333
+ if train_size is not None :
1334
+ warnings .warn ("From version 0.21, test_size will always "
1335
+ "complement train_size unless both "
1336
+ "are specified." ,
1337
+ FutureWarning )
1338
+ test_size = 0.2
1339
+
1324
1340
super (GroupShuffleSplit , self ).__init__ (
1325
1341
n_splits = n_splits ,
1326
1342
test_size = test_size ,
@@ -1428,16 +1444,19 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
1428
1444
1429
1445
Parameters
1430
1446
----------
1431
- n_splits : int ( default 10)
1447
+ n_splits : int, default 10
1432
1448
Number of re-shuffling & splitting iterations.
1433
1449
1434
- test_size : float (default 0.1), int, or None
1435
- If float, should be between 0.0 and 1.0 and represent the
1436
- proportion of the dataset to include in the test split. If
1437
- int, represents the absolute number of test samples. If None,
1438
- the value is automatically set to the complement of the train size.
1450
+ test_size : float, int, None, optional
1451
+ If float, should be between 0.0 and 1.0 and represent the proportion
1452
+ of the dataset to include in the test split. If int, represents the
1453
+ absolute number of test samples. If None, the value is set to the
1454
+ complement of the train size. By default, the value is set to 0.1.
1455
+ The default will change in version 0.21. It will remain 0.1 only
1456
+ if ``train_size`` is unspecified, otherwise it will complement
1457
+ the specified ``train_size``.
1439
1458
1440
- train_size : float, int, or None ( default is None)
1459
+ train_size : float, int, or None, default is None
1441
1460
If float, should be between 0.0 and 1.0 and represent the
1442
1461
proportion of the dataset to include in the train split. If
1443
1462
int, represents the absolute number of train samples. If None,
@@ -1468,7 +1487,7 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
1468
1487
TRAIN: [0 2] TEST: [3 1]
1469
1488
"""
1470
1489
1471
- def __init__ (self , n_splits = 10 , test_size = 0.1 , train_size = None ,
1490
+ def __init__ (self , n_splits = 10 , test_size = "default" , train_size = None ,
1472
1491
random_state = None ):
1473
1492
super (StratifiedShuffleSplit , self ).__init__ (
1474
1493
n_splits , test_size , train_size , random_state )
@@ -1563,6 +1582,14 @@ def _validate_shuffle_split_init(test_size, train_size):
1563
1582
NOTE This does not take into account the number of samples which is known
1564
1583
only at split
1565
1584
"""
1585
+ if test_size == "default" :
1586
+ if train_size is not None :
1587
+ warnings .warn ("From version 0.21, test_size will always "
1588
+ "complement train_size unless both "
1589
+ "are specified." ,
1590
+ FutureWarning )
1591
+ test_size = 0.1
1592
+
1566
1593
if test_size is None and train_size is None :
1567
1594
raise ValueError ('test_size and train_size can not both be None' )
1568
1595
@@ -1597,16 +1624,21 @@ def _validate_shuffle_split(n_samples, test_size, train_size):
1597
1624
Validation helper to check if the test/test sizes are meaningful wrt to the
1598
1625
size of the data (n_samples)
1599
1626
"""
1600
- if (test_size is not None and np .asarray (test_size ).dtype .kind == 'i' and
1627
+ if (test_size is not None and
1628
+ np .asarray (test_size ).dtype .kind == 'i' and
1601
1629
test_size >= n_samples ):
1602
1630
raise ValueError ('test_size=%d should be smaller than the number of '
1603
1631
'samples %d' % (test_size , n_samples ))
1604
1632
1605
- if (train_size is not None and np .asarray (train_size ).dtype .kind == 'i' and
1633
+ if (train_size is not None and
1634
+ np .asarray (train_size ).dtype .kind == 'i' and
1606
1635
train_size >= n_samples ):
1607
1636
raise ValueError ("train_size=%d should be smaller than the number of"
1608
1637
" samples %d" % (train_size , n_samples ))
1609
1638
1639
+ if test_size == "default" :
1640
+ test_size = 0.1
1641
+
1610
1642
if np .asarray (test_size ).dtype .kind == 'f' :
1611
1643
n_test = ceil (test_size * n_samples )
1612
1644
elif np .asarray (test_size ).dtype .kind == 'i' :
@@ -1844,14 +1876,16 @@ def train_test_split(*arrays, **options):
1844
1876
Allowed inputs are lists, numpy arrays, scipy-sparse
1845
1877
matrices or pandas dataframes.
1846
1878
1847
- test_size : float, int, or None (default is None)
1848
- If float, should be between 0.0 and 1.0 and represent the
1849
- proportion of the dataset to include in the test split. If
1850
- int, represents the absolute number of test samples. If None,
1851
- the value is automatically set to the complement of the train size.
1852
- If train size is also None, test size is set to 0.25.
1879
+ test_size : float, int, None, optional
1880
+ If float, should be between 0.0 and 1.0 and represent the proportion
1881
+ of the dataset to include in the test split. If int, represents the
1882
+ absolute number of test samples. If None, the value is set to the
1883
+ complement of the train size. By default, the value is set to 0.25.
1884
+ The default will change in version 0.21. It will remain 0.25 only
1885
+ if ``train_size`` is unspecified, otherwise it will complement
1886
+ the specified ``train_size``.
1853
1887
1854
- train_size : float, int, or None ( default is None)
1888
+ train_size : float, int, or None, default None
1855
1889
If float, should be between 0.0 and 1.0 and represent the
1856
1890
proportion of the dataset to include in the train split. If
1857
1891
int, represents the absolute number of train samples. If None,
@@ -1917,7 +1951,7 @@ def train_test_split(*arrays, **options):
1917
1951
n_arrays = len (arrays )
1918
1952
if n_arrays == 0 :
1919
1953
raise ValueError ("At least one array required as input" )
1920
- test_size = options .pop ('test_size' , None )
1954
+ test_size = options .pop ('test_size' , 'default' )
1921
1955
train_size = options .pop ('train_size' , None )
1922
1956
random_state = options .pop ('random_state' , None )
1923
1957
stratify = options .pop ('stratify' , None )
@@ -1926,6 +1960,14 @@ def train_test_split(*arrays, **options):
1926
1960
if options :
1927
1961
raise TypeError ("Invalid parameters passed: %s" % str (options ))
1928
1962
1963
+ if test_size == 'default' :
1964
+ test_size = None
1965
+ if train_size is not None :
1966
+ warnings .warn ("From version 0.21, test_size will always "
1967
+ "complement train_size unless both "
1968
+ "are specified." ,
1969
+ FutureWarning )
1970
+
1929
1971
if test_size is None and train_size is None :
1930
1972
test_size = 0.25
1931
1973
0 commit comments