@@ -235,17 +235,18 @@ def fit(self, X, Y):
235
235
236
236
# copy since this will contains the residuals (deflated) matrices
237
237
check_consistent_length (X , Y )
238
- X = check_array (X , dtype = np .float , copy = self .copy )
239
- Y = check_array (Y , dtype = np .float , copy = self .copy , ensure_2d = False )
238
+ X = check_array (X , dtype = np .float64 , copy = self .copy )
239
+ Y = check_array (Y , dtype = np .float64 , copy = self .copy , ensure_2d = False )
240
240
if Y .ndim == 1 :
241
- Y = Y [:, None ]
241
+ Y = Y . reshape ( - 1 , 1 )
242
242
243
243
n = X .shape [0 ]
244
244
p = X .shape [1 ]
245
245
q = Y .shape [1 ]
246
246
247
247
if self .n_components < 1 or self .n_components > p :
248
- raise ValueError ('invalid number of components' )
248
+ raise ValueError ('Invalid number of components: %d' %
249
+ self .n_components )
249
250
if self .algorithm not in ("svd" , "nipals" ):
250
251
raise ValueError ("Got algorithm %s when only 'svd' "
251
252
"and 'nipals' are known" % self .algorithm )
@@ -271,6 +272,10 @@ def fit(self, X, Y):
271
272
272
273
# NIPALS algo: outer loop, over components
273
274
for k in range (self .n_components ):
275
+ if np .all (np .dot (Yk .T , Yk ) < np .finfo (np .double ).eps ):
276
+ # Yk constant
277
+ warnings .warn ('Y residual constant at iteration %s' % k )
278
+ break
274
279
#1) weights estimation (inner loop)
275
280
# -----------------------------------
276
281
if self .algorithm == "nipals" :
@@ -291,6 +296,7 @@ def fit(self, X, Y):
291
296
# test for null variance
292
297
if np .dot (x_scores .T , x_scores ) < np .finfo (np .double ).eps :
293
298
warnings .warn ('X scores are null at iteration %s' % k )
299
+ break
294
300
#2) Deflation (in place)
295
301
# ----------------------
296
302
# Possible memory footprint reduction may done here: in order to
@@ -335,6 +341,7 @@ def fit(self, X, Y):
335
341
self .y_rotations_ = np .ones (1 )
336
342
337
343
if True or self .deflation_mode == "regression" :
344
+ # FIXME what's with the if?
338
345
# Estimate regression coefficient
339
346
# Regress Y on T
340
347
# Y = TQ' + Err,
@@ -367,23 +374,19 @@ def transform(self, X, Y=None, copy=True):
367
374
x_scores if Y is not given, (x_scores, y_scores) otherwise.
368
375
"""
369
376
check_is_fitted (self , 'x_mean_' )
377
+ X = check_array (X , copy = copy )
370
378
# Normalize
371
- if copy :
372
- Xc = (np .asarray (X ) - self .x_mean_ ) / self .x_std_
373
- if Y is not None :
374
- Yc = (np .asarray (Y ) - self .y_mean_ ) / self .y_std_
375
- else :
376
- X = np .asarray (X )
377
- Xc -= self .x_mean_
378
- Xc /= self .x_std_
379
- if Y is not None :
380
- Y = np .asarray (Y )
381
- Yc -= self .y_mean_
382
- Yc /= self .y_std_
379
+ X -= self .x_mean_
380
+ X /= self .x_std_
383
381
# Apply rotation
384
- x_scores = np .dot (Xc , self .x_rotations_ )
382
+ x_scores = np .dot (X , self .x_rotations_ )
385
383
if Y is not None :
386
- y_scores = np .dot (Yc , self .y_rotations_ )
384
+ Y = check_array (Y , ensure_2d = False , copy = copy )
385
+ if Y .ndim == 1 :
386
+ Y = Y .reshape (- 1 , 1 )
387
+ Y -= self .y_mean_
388
+ Y /= self .y_std_
389
+ y_scores = np .dot (Y , self .y_rotations_ )
387
390
return x_scores , y_scores
388
391
389
392
return x_scores
@@ -406,14 +409,11 @@ def predict(self, X, copy=True):
406
409
be an issue in high dimensional space.
407
410
"""
408
411
check_is_fitted (self , 'x_mean_' )
412
+ X = check_array (X , copy = copy )
409
413
# Normalize
410
- if copy :
411
- Xc = (np .asarray (X ) - self .x_mean_ )
412
- else :
413
- X = np .asarray (X )
414
- Xc -= self .x_mean_
415
- Xc /= self .x_std_
416
- Ypred = np .dot (Xc , self .coef_ )
414
+ X -= self .x_mean_
415
+ X /= self .x_std_
416
+ Ypred = np .dot (X , self .coef_ )
417
417
return Ypred + self .y_mean_
418
418
419
419
def fit_transform (self , X , y = None , ** fit_params ):
@@ -724,13 +724,15 @@ def __init__(self, n_components=2, scale=True, copy=True):
724
724
def fit (self , X , Y ):
725
725
# copy since this will contains the centered data
726
726
check_consistent_length (X , Y )
727
- X = check_array (X , dtype = np .float , copy = self .copy )
728
- Y = check_array (Y , dtype = np .float , copy = self .copy )
729
-
730
- p = X . shape [ 1 ]
727
+ X = check_array (X , dtype = np .float64 , copy = self .copy )
728
+ Y = check_array (Y , dtype = np .float64 , copy = self .copy , ensure_2d = False )
729
+ if Y . ndim == 1 :
730
+ Y = Y . reshape ( - 1 , 1 )
731
731
732
- if self .n_components < 1 or self .n_components > p :
733
- raise ValueError ('invalid number of components' )
732
+ if self .n_components > max (Y .shape [1 ], X .shape [1 ]):
733
+ raise ValueError ("Invalid number of components n_components=%d with "
734
+ "X of shape %s and Y of shape %s."
735
+ % (self .n_components , str (X .shape ), str (Y .shape )))
734
736
735
737
# Scale (in place)
736
738
X , Y , self .x_mean_ , self .y_mean_ , self .x_std_ , self .y_std_ = \
@@ -742,7 +744,7 @@ def fit(self, X, Y):
742
744
# components is smaller than rank(X) - 1. Hence, if we want to extract
743
745
# all the components (C.shape[1]), we have to use another one. Else,
744
746
# let's use arpacks to compute only the interesting components.
745
- if self .n_components == C .shape [ 1 ] :
747
+ if self .n_components >= np . min ( C .shape ) :
746
748
U , s , V = linalg .svd (C , full_matrices = False )
747
749
else :
748
750
U , s , V = arpack .svds (C , k = self .n_components )
@@ -756,9 +758,12 @@ def fit(self, X, Y):
756
758
def transform (self , X , Y = None ):
757
759
"""Apply the dimension reduction learned on the train data."""
758
760
check_is_fitted (self , 'x_mean_' )
761
+ X = check_array (X , dtype = np .float64 )
759
762
Xr = (X - self .x_mean_ ) / self .x_std_
760
763
x_scores = np .dot (Xr , self .x_weights_ )
761
764
if Y is not None :
765
+ if Y .ndim == 1 :
766
+ Y = Y .reshape (- 1 , 1 )
762
767
Yr = (Y - self .y_mean_ ) / self .y_std_
763
768
y_scores = np .dot (Yr , self .y_weights_ )
764
769
return x_scores , y_scores
0 commit comments