@@ -169,7 +169,9 @@ def resample(*arrays, **options):
169
169
170
170
Parameters
171
171
----------
172
- *arrays : sequence of arrays or scipy.sparse matrices with same shape[0]
172
+ *arrays : sequence of indexable data-structures
173
+ Indexable data-structures can be arrays, lists, dataframes or scipy
174
+ sparse matrices with consistent first dimension.
173
175
174
176
replace : boolean, True by default
175
177
Implements resampling with replacement. If False, this will implement
@@ -184,16 +186,15 @@ def resample(*arrays, **options):
184
186
185
187
Returns
186
188
-------
187
- resampled_arrays : sequence of arrays or scipy.sparse matrices with same \
188
- shape[0]
189
- Sequence of resampled views of the collections. The original arrays are
189
+ resampled_arrays : sequence of indexable data-structures
190
+ Sequence of resampled views of the collections. The original arrays are
190
191
not impacted.
191
192
192
193
Examples
193
194
--------
194
195
It is possible to mix sparse and dense arrays in the same run::
195
196
196
- >>> X = [[1., 0.], [2., 1.], [0., 0.]]
197
+ >>> X = np.array( [[1., 0.], [2., 1.], [0., 0.]])
197
198
>>> y = np.array([0, 1, 2])
198
199
199
200
>>> from scipy.sparse import coo_matrix
@@ -247,8 +248,6 @@ def resample(*arrays, **options):
247
248
max_n_samples , n_samples ))
248
249
249
250
check_consistent_length (* arrays )
250
- arrays = [check_array (x , accept_sparse = 'csr' , ensure_2d = False ,
251
- allow_nd = True ) for x in arrays ]
252
251
253
252
if replace :
254
253
indices = random_state .randint (0 , n_samples , size = (max_n_samples ,))
@@ -257,12 +256,9 @@ def resample(*arrays, **options):
257
256
random_state .shuffle (indices )
258
257
indices = indices [:max_n_samples ]
259
258
260
- resampled_arrays = []
261
-
262
- for array in arrays :
263
- array = array [indices ]
264
- resampled_arrays .append (array )
265
-
259
+ # convert sparse matrices to CSR for row-based indexing
260
+ arrays = [a .tocsr () if issparse (a ) else a for a in arrays ]
261
+ resampled_arrays = [safe_indexing (a , indices ) for a in arrays ]
266
262
if len (resampled_arrays ) == 1 :
267
263
# syntactic sugar for the unit argument case
268
264
return resampled_arrays [0 ]
@@ -278,7 +274,9 @@ def shuffle(*arrays, **options):
278
274
279
275
Parameters
280
276
----------
281
- *arrays : sequence of arrays or scipy.sparse matrices with same shape[0]
277
+ *arrays : sequence of indexable data-structures
278
+ Indexable data-structures can be arrays, lists, dataframes or scipy
279
+ sparse matrices with consistent first dimension.
282
280
283
281
random_state : int or RandomState instance
284
282
Control the shuffling for reproducible behavior.
@@ -289,16 +287,15 @@ def shuffle(*arrays, **options):
289
287
290
288
Returns
291
289
-------
292
- shuffled_arrays : sequence of arrays or scipy.sparse matrices with same \
293
- shape[0]
290
+ shuffled_arrays : sequence of indexable data-structures
294
291
Sequence of shuffled views of the collections. The original arrays are
295
292
not impacted.
296
293
297
294
Examples
298
295
--------
299
296
It is possible to mix sparse and dense arrays in the same run::
300
297
301
- >>> X = [[1., 0.], [2., 1.], [0., 0.]]
298
+ >>> X = np.array( [[1., 0.], [2., 1.], [0., 0.]])
302
299
>>> y = np.array([0, 1, 2])
303
300
304
301
>>> from scipy.sparse import coo_matrix
0 commit comments