@@ -32,23 +32,23 @@ def _check_positive_coding(method, positive):
32
32
)
33
33
34
34
35
- def _sparse_encode (
35
+ def _sparse_encode_precomputed (
36
36
X ,
37
37
dictionary ,
38
- gram ,
38
+ * ,
39
+ gram = None ,
39
40
cov = None ,
40
41
algorithm = "lasso_lars" ,
41
42
regularization = None ,
42
43
copy_cov = True ,
43
44
init = None ,
44
45
max_iter = 1000 ,
45
- check_input = True ,
46
46
verbose = 0 ,
47
47
positive = False ,
48
48
):
49
- """Generic sparse coding.
49
+ """Generic sparse coding with precomputed Gram and/or covariance matrices .
50
50
51
- Each column of the result is the solution to a Lasso problem.
51
+ Each row of the result is the solution to a Lasso problem.
52
52
53
53
Parameters
54
54
----------
@@ -59,7 +59,7 @@ def _sparse_encode(
59
59
The dictionary matrix against which to solve the sparse coding of
60
60
the data. Some of the algorithms assume normalized rows.
61
61
62
- gram : ndarray of shape (n_components, n_components) or None
62
+ gram : ndarray of shape (n_components, n_components), default= None
63
63
Precomputed Gram matrix, `dictionary * dictionary'`
64
64
gram can be `None` if method is 'threshold'.
65
65
@@ -98,9 +98,6 @@ def _sparse_encode(
98
98
Whether to copy the precomputed covariance matrix; if `False`, it may
99
99
be overwritten.
100
100
101
- check_input : bool, default=True
102
- If `False`, the input arrays `X` and dictionary will not be checked.
103
-
104
101
verbose : int, default=0
105
102
Controls the verbosity; the higher, the more messages.
106
103
@@ -113,29 +110,9 @@ def _sparse_encode(
113
110
-------
114
111
code : ndarray of shape (n_components, n_features)
115
112
The sparse codes.
116
-
117
- See Also
118
- --------
119
- sklearn.linear_model.lars_path
120
- sklearn.linear_model.orthogonal_mp
121
- sklearn.linear_model.Lasso
122
- SparseCoder
123
113
"""
124
- if X .ndim == 1 :
125
- X = X [:, np .newaxis ]
126
114
n_samples , n_features = X .shape
127
115
n_components = dictionary .shape [0 ]
128
- if dictionary .shape [1 ] != X .shape [1 ]:
129
- raise ValueError (
130
- "Dictionary and X have different numbers of features:"
131
- "dictionary.shape: {} X.shape{}" .format (dictionary .shape , X .shape )
132
- )
133
- if cov is None and algorithm != "lasso_cd" :
134
- # overwriting cov is safe
135
- copy_cov = False
136
- cov = np .dot (dictionary , X .T )
137
-
138
- _check_positive_coding (algorithm , positive )
139
116
140
117
if algorithm == "lasso_lars" :
141
118
alpha = float (regularization ) / n_features # account for scaling
@@ -183,7 +160,7 @@ def _sparse_encode(
183
160
init = np .array (init )
184
161
clf .coef_ = init
185
162
186
- clf .fit (dictionary .T , X .T , check_input = check_input )
163
+ clf .fit (dictionary .T , X .T , check_input = False )
187
164
new_code = clf .coef_
188
165
189
166
elif algorithm == "lars" :
@@ -218,14 +195,8 @@ def _sparse_encode(
218
195
norms_squared = row_norms (X , squared = True ),
219
196
copy_Xy = copy_cov ,
220
197
).T
221
- else :
222
- raise ValueError (
223
- 'Sparse coding method must be "lasso_lars" '
224
- '"lasso_cd", "lasso", "threshold" or "omp", got %s.' % algorithm
225
- )
226
- if new_code .ndim != 2 :
227
- return new_code .reshape (n_samples , n_components )
228
- return new_code
198
+
199
+ return new_code .reshape (n_samples , n_components )
229
200
230
201
231
202
@validate_params (
@@ -375,15 +346,51 @@ def sparse_encode(
375
346
dictionary = check_array (dictionary )
376
347
X = check_array (X )
377
348
378
- n_samples , n_features = X .shape
379
- n_components = dictionary .shape [0 ]
349
+ if dictionary .shape [1 ] != X .shape [1 ]:
350
+ raise ValueError (
351
+ "Dictionary and X have different numbers of features:"
352
+ "dictionary.shape: {} X.shape{}" .format (dictionary .shape , X .shape )
353
+ )
380
354
381
- if gram is None and algorithm != "threshold" :
382
- gram = np .dot (dictionary , dictionary .T )
355
+ _check_positive_coding (algorithm , positive )
383
356
384
- if cov is None and algorithm != "lasso_cd" :
385
- copy_cov = False
386
- cov = np .dot (dictionary , X .T )
357
+ return _sparse_encode (
358
+ X ,
359
+ dictionary ,
360
+ gram = gram ,
361
+ cov = cov ,
362
+ algorithm = algorithm ,
363
+ n_nonzero_coefs = n_nonzero_coefs ,
364
+ alpha = alpha ,
365
+ copy_cov = copy_cov ,
366
+ init = init ,
367
+ max_iter = max_iter ,
368
+ n_jobs = n_jobs ,
369
+ verbose = verbose ,
370
+ positive = positive ,
371
+ )
372
+
373
+
374
+ def _sparse_encode (
375
+ X ,
376
+ dictionary ,
377
+ * ,
378
+ gram = None ,
379
+ cov = None ,
380
+ algorithm = "lasso_lars" ,
381
+ n_nonzero_coefs = None ,
382
+ alpha = None ,
383
+ copy_cov = True ,
384
+ init = None ,
385
+ max_iter = 1000 ,
386
+ n_jobs = None ,
387
+ verbose = 0 ,
388
+ positive = False ,
389
+ ):
390
+ """Sparse coding without input/parameter validation."""
391
+
392
+ n_samples , n_features = X .shape
393
+ n_components = dictionary .shape [0 ]
387
394
388
395
if algorithm in ("lars" , "omp" ):
389
396
regularization = n_nonzero_coefs
@@ -394,39 +401,46 @@ def sparse_encode(
394
401
if regularization is None :
395
402
regularization = 1.0
396
403
404
+ if gram is None and algorithm != "threshold" :
405
+ gram = np .dot (dictionary , dictionary .T )
406
+
407
+ if cov is None and algorithm != "lasso_cd" :
408
+ copy_cov = False
409
+ cov = np .dot (dictionary , X .T )
410
+
397
411
if effective_n_jobs (n_jobs ) == 1 or algorithm == "threshold" :
398
- code = _sparse_encode (
412
+ code = _sparse_encode_precomputed (
399
413
X ,
400
414
dictionary ,
401
- gram ,
415
+ gram = gram ,
402
416
cov = cov ,
403
417
algorithm = algorithm ,
404
418
regularization = regularization ,
405
419
copy_cov = copy_cov ,
406
420
init = init ,
407
421
max_iter = max_iter ,
408
- check_input = False ,
409
422
verbose = verbose ,
410
423
positive = positive ,
411
424
)
412
425
return code
413
426
414
427
# Enter parallel code block
428
+ n_samples = X .shape [0 ]
429
+ n_components = dictionary .shape [0 ]
415
430
code = np .empty ((n_samples , n_components ))
416
431
slices = list (gen_even_slices (n_samples , effective_n_jobs (n_jobs )))
417
432
418
433
code_views = Parallel (n_jobs = n_jobs , verbose = verbose )(
419
- delayed (_sparse_encode )(
434
+ delayed (_sparse_encode_precomputed )(
420
435
X [this_slice ],
421
436
dictionary ,
422
- gram ,
423
- cov [:, this_slice ] if cov is not None else None ,
424
- algorithm ,
437
+ gram = gram ,
438
+ cov = cov [:, this_slice ] if cov is not None else None ,
439
+ algorithm = algorithm ,
425
440
regularization = regularization ,
426
441
copy_cov = copy_cov ,
427
442
init = init [this_slice ] if init is not None else None ,
428
443
max_iter = max_iter ,
429
- check_input = False ,
430
444
verbose = verbose ,
431
445
positive = positive ,
432
446
)
@@ -2205,13 +2219,12 @@ def _minibatch_step(self, X, dictionary, random_state, step):
2205
2219
batch_size = X .shape [0 ]
2206
2220
2207
2221
# Compute code for this batch
2208
- code = sparse_encode (
2222
+ code = _sparse_encode (
2209
2223
X ,
2210
2224
dictionary ,
2211
2225
algorithm = self ._fit_algorithm ,
2212
2226
alpha = self .alpha ,
2213
2227
n_jobs = self .n_jobs ,
2214
- check_input = False ,
2215
2228
positive = self .positive_code ,
2216
2229
max_iter = self .transform_max_iter ,
2217
2230
verbose = self .verbose ,
0 commit comments