6
6
import traceback
7
7
import pickle
8
8
from copy import deepcopy
9
+ import struct
10
+ from functools import partial
11
+
9
12
import numpy as np
10
13
from scipy import sparse
11
14
from scipy .stats import rankdata
12
- import struct
13
15
14
16
from sklearn .externals .six .moves import zip
15
17
from sklearn .externals .joblib import hash , Memory
33
35
from sklearn .utils .testing import SkipTest
34
36
from sklearn .utils .testing import ignore_warnings
35
37
from sklearn .utils .testing import assert_dict_equal
38
+ from sklearn .utils .testing import create_memmap_backed_data
36
39
from sklearn .discriminant_analysis import LinearDiscriminantAnalysis
37
40
38
41
@@ -84,6 +87,7 @@ def _yield_non_meta_checks(name, estimator):
84
87
yield check_sample_weights_pandas_series
85
88
yield check_sample_weights_list
86
89
yield check_estimators_fit_returns_self
90
+ yield partial (check_estimators_fit_returns_self , readonly_memmap = True )
87
91
yield check_complex_data
88
92
89
93
# Check that all estimator yield informative messages when
@@ -123,6 +127,7 @@ def _yield_classifier_checks(name, classifier):
123
127
yield check_estimators_partial_fit_n_features
124
128
# basic consistency testing
125
129
yield check_classifiers_train
130
+ yield partial (check_classifiers_train , readonly_memmap = True )
126
131
yield check_classifiers_regression_target
127
132
if (name not in ["MultinomialNB" , "ComplementNB" , "LabelPropagation" ,
128
133
"LabelSpreading" ] and
@@ -171,6 +176,7 @@ def _yield_regressor_checks(name, regressor):
171
176
# TODO: test with multiple responses
172
177
# basic testing
173
178
yield check_regressors_train
179
+ yield partial (check_regressors_train , readonly_memmap = True )
174
180
yield check_regressor_data_not_an_array
175
181
yield check_estimators_partial_fit_n_features
176
182
yield check_regressors_no_decision_function
@@ -196,6 +202,7 @@ def _yield_transformer_checks(name, transformer):
196
202
'FunctionTransformer' , 'Normalizer' ]:
197
203
# basic tests
198
204
yield check_transformer_general
205
+ yield partial (check_transformer_general , readonly_memmap = True )
199
206
yield check_transformers_unfitted
200
207
# Dependent on external solvers and hence accessing the iter
201
208
# param is non-trivial.
@@ -211,6 +218,7 @@ def _yield_clustering_checks(name, clusterer):
211
218
# this is clustering on the features
212
219
# let's not test that here.
213
220
yield check_clustering
221
+ yield partial (check_clustering , readonly_memmap = True )
214
222
yield check_estimators_partial_fit_n_features
215
223
yield check_non_transformer_estimators_n_iter
216
224
@@ -223,6 +231,7 @@ def _yield_outliers_checks(name, estimator):
223
231
# checks for estimators that can be used on a test set
224
232
if hasattr (estimator , 'predict' ):
225
233
yield check_outliers_train
234
+ yield partial (check_outliers_train , readonly_memmap = True )
226
235
# test outlier detectors can handle non-array data
227
236
yield check_classifier_data_not_an_array
228
237
# test if NotFittedError is raised
@@ -799,14 +808,18 @@ def check_fit1d(name, estimator_orig):
799
808
800
809
801
810
@ignore_warnings (category = (DeprecationWarning , FutureWarning ))
802
- def check_transformer_general (name , transformer ):
811
+ def check_transformer_general (name , transformer , readonly_memmap = False ):
803
812
X , y = make_blobs (n_samples = 30 , centers = [[0 , 0 , 0 ], [1 , 1 , 1 ]],
804
813
random_state = 0 , n_features = 2 , cluster_std = 0.1 )
805
814
X = StandardScaler ().fit_transform (X )
806
815
X -= X .min ()
807
816
if name == 'PowerTransformer' :
808
817
# Box-Cox requires positive, non-zero data
809
818
X += 1
819
+
820
+ if readonly_memmap :
821
+ X , y = create_memmap_backed_data ([X , y ])
822
+
810
823
_check_transformer (name , transformer , X , y )
811
824
_check_transformer (name , transformer , X .tolist (), y .tolist ())
812
825
@@ -1165,11 +1178,17 @@ def check_estimators_partial_fit_n_features(name, estimator_orig):
1165
1178
1166
1179
1167
1180
@ignore_warnings (category = (DeprecationWarning , FutureWarning ))
1168
- def check_clustering (name , clusterer_orig ):
1181
+ def check_clustering (name , clusterer_orig , readonly_memmap = False ):
1169
1182
clusterer = clone (clusterer_orig )
1170
1183
X , y = make_blobs (n_samples = 50 , random_state = 1 )
1171
1184
X , y = shuffle (X , y , random_state = 7 )
1172
1185
X = StandardScaler ().fit_transform (X )
1186
+ rng = np .random .RandomState (7 )
1187
+ X_noise = np .concatenate ([X , rng .uniform (low = - 3 , high = 3 , size = (5 , 2 ))])
1188
+
1189
+ if readonly_memmap :
1190
+ X , y , X_noise = create_memmap_backed_data ([X , y , X_noise ])
1191
+
1173
1192
n_samples , n_features = X .shape
1174
1193
# catch deprecation and neighbors warnings
1175
1194
if hasattr (clusterer , "n_clusters" ):
@@ -1201,8 +1220,6 @@ def check_clustering(name, clusterer_orig):
1201
1220
assert_in (pred2 .dtype , [np .dtype ('int32' ), np .dtype ('int64' )])
1202
1221
1203
1222
# Add noise to X to test the possible values of the labels
1204
- rng = np .random .RandomState (7 )
1205
- X_noise = np .concatenate ([X , rng .uniform (low = - 3 , high = 3 , size = (5 , 2 ))])
1206
1223
labels = clusterer .fit_predict (X_noise )
1207
1224
1208
1225
# There should be at least one sample in every cluster. Equivalently
@@ -1273,20 +1290,26 @@ def check_classifiers_one_label(name, classifier_orig):
1273
1290
1274
1291
1275
1292
@ignore_warnings # Warnings are raised by decision function
1276
- def check_classifiers_train (name , classifier_orig ):
1293
+ def check_classifiers_train (name , classifier_orig , readonly_memmap = False ):
1277
1294
X_m , y_m = make_blobs (n_samples = 300 , random_state = 0 )
1278
1295
X_m , y_m = shuffle (X_m , y_m , random_state = 7 )
1279
1296
X_m = StandardScaler ().fit_transform (X_m )
1280
1297
# generate binary problem from multi-class one
1281
1298
y_b = y_m [y_m != 2 ]
1282
1299
X_b = X_m [y_m != 2 ]
1300
+
1301
+ if name in ['BernoulliNB' , 'MultinomialNB' , 'ComplementNB' ]:
1302
+ X_m -= X_m .min ()
1303
+ X_b -= X_b .min ()
1304
+
1305
+ if readonly_memmap :
1306
+ X_m , y_m , X_b , y_b = create_memmap_backed_data ([X_m , y_m , X_b , y_b ])
1307
+
1283
1308
for (X , y ) in [(X_m , y_m ), (X_b , y_b )]:
1284
1309
classes = np .unique (y )
1285
1310
n_classes = len (classes )
1286
1311
n_samples , n_features = X .shape
1287
1312
classifier = clone (classifier_orig )
1288
- if name in ['BernoulliNB' , 'MultinomialNB' , 'ComplementNB' ]:
1289
- X -= X .min ()
1290
1313
X = pairwise_estimator_convert_X (X , classifier_orig )
1291
1314
set_random_state (classifier )
1292
1315
# raises error on malformed input for fit
@@ -1382,9 +1405,13 @@ def check_classifiers_train(name, classifier_orig):
1382
1405
assert_array_equal (np .argsort (y_log_prob ), np .argsort (y_prob ))
1383
1406
1384
1407
1385
- def check_outliers_train (name , estimator_orig ):
1408
+ def check_outliers_train (name , estimator_orig , readonly_memmap = True ):
1386
1409
X , _ = make_blobs (n_samples = 300 , random_state = 0 )
1387
1410
X = shuffle (X , random_state = 7 )
1411
+
1412
+ if readonly_memmap :
1413
+ X = create_memmap_backed_data (X )
1414
+
1388
1415
n_samples , n_features = X .shape
1389
1416
estimator = clone (estimator_orig )
1390
1417
set_random_state (estimator )
@@ -1444,7 +1471,8 @@ def check_outliers_train(name, estimator_orig):
1444
1471
1445
1472
1446
1473
@ignore_warnings (category = (DeprecationWarning , FutureWarning ))
1447
- def check_estimators_fit_returns_self (name , estimator_orig ):
1474
+ def check_estimators_fit_returns_self (name , estimator_orig ,
1475
+ readonly_memmap = False ):
1448
1476
"""Check if self is returned when calling fit"""
1449
1477
X , y = make_blobs (random_state = 0 , n_samples = 9 , n_features = 4 )
1450
1478
# some want non-negative input
@@ -1457,8 +1485,10 @@ def check_estimators_fit_returns_self(name, estimator_orig):
1457
1485
estimator = clone (estimator_orig )
1458
1486
y = multioutput_estimator_convert_y_2d (estimator , y )
1459
1487
1460
- set_random_state (estimator )
1488
+ if readonly_memmap :
1489
+ X , y = create_memmap_backed_data ([X , y ])
1461
1490
1491
+ set_random_state (estimator )
1462
1492
assert_true (estimator .fit (X , y ) is estimator )
1463
1493
1464
1494
@@ -1637,14 +1667,23 @@ def check_regressors_int(name, regressor_orig):
1637
1667
1638
1668
1639
1669
@ignore_warnings (category = (DeprecationWarning , FutureWarning ))
1640
- def check_regressors_train (name , regressor_orig ):
1670
+ def check_regressors_train (name , regressor_orig , readonly_memmap = False ):
1641
1671
X , y = _boston_subset ()
1642
1672
X = pairwise_estimator_convert_X (X , regressor_orig )
1643
1673
y = StandardScaler ().fit_transform (y .reshape (- 1 , 1 )) # X is already scaled
1644
1674
y = y .ravel ()
1645
1675
regressor = clone (regressor_orig )
1646
1676
y = multioutput_estimator_convert_y_2d (regressor , y )
1647
- rnd = np .random .RandomState (0 )
1677
+ if name in CROSS_DECOMPOSITION :
1678
+ rnd = np .random .RandomState (0 )
1679
+ y_ = np .vstack ([y , 2 * y + rnd .randint (2 , size = len (y ))])
1680
+ y_ = y_ .T
1681
+ else :
1682
+ y_ = y
1683
+
1684
+ if readonly_memmap :
1685
+ X , y , y_ = create_memmap_backed_data ([X , y , y_ ])
1686
+
1648
1687
if not hasattr (regressor , 'alphas' ) and hasattr (regressor , 'alpha' ):
1649
1688
# linear regressors need to set alpha, but not generalized CV ones
1650
1689
regressor .alpha = 0.01
@@ -1659,11 +1698,6 @@ def check_regressors_train(name, regressor_orig):
1659
1698
"labels. Perhaps use check_X_y in fit." .format (name )):
1660
1699
regressor .fit (X , y [:- 1 ])
1661
1700
# fit
1662
- if name in CROSS_DECOMPOSITION :
1663
- y_ = np .vstack ([y , 2 * y + rnd .randint (2 , size = len (y ))])
1664
- y_ = y_ .T
1665
- else :
1666
- y_ = y
1667
1701
set_random_state (regressor )
1668
1702
regressor .fit (X , y_ )
1669
1703
regressor .fit (X .tolist (), y_ .tolist ())
0 commit comments