|
12 | 12 | from sklearn.utils.testing import assert_almost_equal
|
13 | 13 | from sklearn.utils.testing import assert_raises
|
14 | 14 | from sklearn.utils.testing import assert_greater
|
| 15 | +from sklearn.utils.testing import assert_greater_equal |
15 | 16 | from sklearn.utils.testing import assert_less
|
16 | 17 | from sklearn.utils.testing import assert_not_equal
|
17 | 18 | from sklearn.utils.testing import assert_array_almost_equal
|
@@ -1042,55 +1043,53 @@ def test_check_is_partition():
|
1042 | 1043 |
|
1043 | 1044 | def test_subject_independent_folds():
|
1044 | 1045 | """ Check that the function produces equilibrated folds
|
1045 |
| - with no subject appearing in two different folds |
| 1046 | + with no group appearing in two different folds |
1046 | 1047 | """
|
1047 | 1048 | # Fix the seed for reproducibility
|
1048 |
| - np.random.seed(0) |
| 1049 | + rng = np.random.RandomState(0) |
1049 | 1050 |
|
1050 | 1051 | # Parameters of the test
|
1051 |
| - n_subjects = 15 |
| 1052 | + n_groups = 15 |
1052 | 1053 | n_samples = 1000
|
1053 | 1054 | n_folds = 5
|
1054 | 1055 |
|
1055 | 1056 | # Construct the test data
|
1056 | 1057 | tolerance = 0.05 * n_samples # 5 percent error allowed
|
1057 |
| - subjects = np.random.randint(0, n_subjects, n_samples) |
1058 |
| - folds = cval.subject_independent_folds(subjects, n_folds) |
1059 |
| - ideal_n_subjects_per_fold = n_samples // n_folds |
| 1058 | + groups = np.random.randint(0, n_groups, n_samples) |
| 1059 | + folds = cval.disjoint_group_folds(groups, n_folds) |
| 1060 | + ideal_n_groups_per_fold = n_samples // n_folds |
1060 | 1061 |
|
1061 | 1062 | # Check that folds have approximately the same size
|
1062 |
| - assert(len(folds)==len(subjects)) |
| 1063 | + assert_equal(len(folds), len(groups)) |
1063 | 1064 | for i in np.unique(folds):
|
1064 |
| - assert(abs(sum(folds == i) - ideal_n_subjects_per_fold) <= tolerance) |
| 1065 | + assert_greater_equal(tolerance, abs(sum(folds == i) - ideal_n_groups_per_fold)) |
1065 | 1066 |
|
1066 | 1067 | # Check that each subjects appears only in 1 fold
|
1067 |
| - for subject in np.unique(subjects): |
1068 |
| - assert(len(np.unique(folds[subjects == subject])) == 1) |
| 1068 | + for group in np.unique(groups): |
| 1069 | + assert_equal(len(np.unique(folds[groups == group])), 1) |
1069 | 1070 |
|
1070 |
| - subjects = ['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean', |
| 1071 | + # Construct the test data |
| 1072 | + groups = ['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean', |
1071 | 1073 | 'Francis', 'Robert', 'Michel', 'Rachel', 'Lois',
|
1072 | 1074 | 'Michelle', 'Bernard', 'Marion', 'Laura', 'Jean',
|
1073 | 1075 | 'Rachel', 'Franck', 'John', 'Gael', 'Anna', 'Alix',
|
1074 | 1076 | 'Robert', 'Marion', 'David', 'Tony', 'Abel', 'Becky',
|
1075 | 1077 | 'Madmood', 'Cary', 'Mary', 'Alexandre', 'David', 'Francis',
|
1076 | 1078 | 'Barack', 'Abdoul', 'Rasha', 'Xi', 'Silvia']
|
1077 | 1079 |
|
1078 |
| - n_subjects = len(np.unique(subjects)) |
1079 |
| - n_samples = len(subjects) |
| 1080 | + n_groups = len(np.unique(groups)) |
| 1081 | + n_samples = len(groups) |
1080 | 1082 | n_folds = 5
|
1081 |
| - |
1082 |
| - # Construct the test data |
1083 | 1083 | tolerance = 0.05 * n_samples # 5 percent error allowed
|
1084 |
| - subjects = np.random.randint(0, n_subjects, n_samples) |
1085 |
| - folds = cval.subject_independent_folds(subjects, n_folds) |
1086 |
| - ideal_n_subjects_per_fold = n_samples // n_folds |
| 1084 | + folds = cval.disjoint_group_folds(groups, n_folds) |
| 1085 | + ideal_n_groups_per_fold = n_samples // n_folds |
1087 | 1086 |
|
1088 | 1087 | # Check that folds have approximately the same size
|
1089 |
| - assert(len(folds)==len(subjects)) |
| 1088 | + assert_equal(len(folds), len(groups)) |
1090 | 1089 | for i in np.unique(folds):
|
1091 |
| - assert(abs(sum(folds == i) - ideal_n_subjects_per_fold) <= tolerance) |
| 1090 | + assert_greater_equal(tolerance, abs(sum(folds == i) - ideal_n_groups_per_fold)) |
1092 | 1091 |
|
1093 | 1092 | # Check that each subjects appears only in 1 fold
|
1094 |
| - for subject in np.unique(subjects): |
1095 |
| - assert(len(np.unique(folds[subjects == subject])) == 1) |
| 1093 | + for group in np.unique(groups): |
| 1094 | + assert_equal(len(np.unique(folds[groups == group])), 1) |
1096 | 1095 |
|
0 commit comments