-
-
Notifications
You must be signed in to change notification settings - Fork 26.1k
Closed
Description
Hello,
Describe the bug
sklearn.decomposition.PCA
has an inner function collision with matplotlib.pyplot as code with data shows below
Steps/Code to Reproduce
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
DATA = pd.read_csv("DATA.csv")
#1 Calc PCA
x_pca = PCA(8).fit_transform(DATA)
#2 Set up the matplotlib figure matrix → this cause the collision if code is run twice
fig, axes = plt.subplots(2, 2, figsize=(13, 8), sharex=False)
#NOTES:
#A] IF WE RUN ONLY #1 THEN CODE PASSES INFINITE TIMES
#b] IF WE RUN #1 and #2 THEN CODE PASSES for the first time, but second time crashes, if run it for the third time it passes again and fourth time crashes and so on
Expected Results
FIRST TIME FULL CODE PASSES (in general odd run), SECOND TIME RUN (in general even run):
Actual Results for even runs
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-ad8aef2aa149> in <module>
6
7 #1 Calc PCA
----> 8 x_pca = PCA(8).fit_transform(DATA)
9
10
~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in fit_transform(self, X, y)
374 C-ordered array, use 'np.ascontiguousarray'.
375 """
--> 376 U, S, Vt = self._fit(X)
377 U = U[:, :self.n_components_]
378
~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in _fit(self, X)
423 return self._fit_full(X, n_components)
424 elif self._fit_svd_solver in ['arpack', 'randomized']:
--> 425 return self._fit_truncated(X, n_components, self._fit_svd_solver)
426 else:
427 raise ValueError("Unrecognized svd_solver='{0}'"
~\anaconda3\lib\site-packages\sklearn\decomposition\_pca.py in _fit_truncated(self, X, n_components, svd_solver)
539 elif svd_solver == 'randomized':
540 # sign flipping is done inside
--> 541 U, S, Vt = randomized_svd(X, n_components=n_components,
542 n_iter=self.iterated_power,
543 flip_sign=True,
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\anaconda3\lib\site-packages\sklearn\utils\extmath.py in randomized_svd(M, n_components, n_oversamples, n_iter, power_iteration_normalizer, transpose, flip_sign, random_state)
355
356 # compute the SVD on the thin matrix: (k + p) wide
--> 357 Uhat, s, Vt = linalg.svd(B, full_matrices=False)
358
359 del B
~\anaconda3\lib\site-packages\scipy\linalg\decomp_svd.py in svd(a, full_matrices, compute_uv, overwrite_a, check_finite, lapack_driver)
104
105 """
--> 106 a1 = _asarray_validated(a, check_finite=check_finite)
107 if len(a1.shape) != 2:
108 raise ValueError('expected matrix')
~\anaconda3\lib\site-packages\scipy\_lib\_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
260 raise ValueError('masked arrays are not supported')
261 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 262 a = toarray(a)
263 if not objects_ok:
264 if a.dtype is np.dtype('O'):
~\anaconda3\lib\site-packages\numpy\lib\function_base.py in asarray_chkfinite(a, dtype, order)
483 a = asarray(a, dtype=dtype, order=order)
484 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 485 raise ValueError(
486 "array must not contain infs or NaNs")
487 return a
ValueError: array must not contain infs or NaNs
#### Versions
pandas 1.2.1
matplotlib 3.3.3
sklearn 0.24.1
numpy 1.19.5
Metadata
Metadata
Assignees
Labels
No labels