From deefdd85bf5bc3c40556c69dd10a7695d87d275b Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Thu, 23 Feb 2017 17:03:28 +0000
Subject: [PATCH 1/3] my test script

---
 sklearn/decomposition/test.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 sklearn/decomposition/test.py

diff --git a/sklearn/decomposition/test.py b/sklearn/decomposition/test.py
new file mode 100644
index 0000000000000..180d5f11ca13b
--- /dev/null
+++ b/sklearn/decomposition/test.py
@@ -0,0 +1,14 @@
+import numpy as np
+from .pca import PCA
+import pandas as pd
+
+X = np.array([[-1, -1,3,4,-1, -1,3,4], [-2, -1,5,-1, -1,3,4,2], [-3, -2,1,-1, -1,3,4,1],
+[1, 1,4,-1, -1,3,4,2], [2, 1,0,-1, -1,3,4,2], [3, 2,10,-1, -1,3,4,10]])
+
+ipca = PCA(n_components = 7, svd_solver= "arpack")
+
+ipca.fit(X)
+result = ipca.transform(X)
+
+print result.shape
+print ipca.n_components_

From a903ef5ff5d9f27cba001108ca708997ef9b1120 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 28 Feb 2017 14:08:56 +0000
Subject: [PATCH 2/3] r. commit

---
 sklearn/decomposition/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/decomposition/test.py b/sklearn/decomposition/test.py
index 180d5f11ca13b..4c255517e799e 100644
--- a/sklearn/decomposition/test.py
+++ b/sklearn/decomposition/test.py
@@ -1,3 +1,5 @@
+#Script to test PCA
+
 import numpy as np
 from .pca import PCA
 import pandas as pd

From 7f18df2ab4634f35394c3e79668bab9e4417b7e1 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Wed, 1 Mar 2017 20:02:34 +0000
Subject: [PATCH 3/3] introduced pca_modifications and deleted test.py file

---
 sklearn/decomposition/pca.py  | 42 ++++++++++++++++++++++-------------
 sklearn/decomposition/test.py | 16 -------------
 2 files changed, 26 insertions(+), 32 deletions(-)
 delete mode 100644 sklearn/decomposition/test.py

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index a3abaa6217df8..3d5018d118102 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -134,8 +134,11 @@ class PCA(_BasePCA):
         to guess the dimension
         if ``0 < n_components < 1`` and svd_solver == 'full', select the number
         of components such that the amount of variance that needs to be
-        explained is greater than the percentage specified by n_components
-        n_components cannot be equal to n_features for svd_solver == 'arpack'.
+        explained is greater than the percentage specified by n_components.
+        if svd_solver == 'arpack', the number of components must be strictly
+        less than the minimum of n_features and n_samples:
+
+            n_components == min(n_samples, n_features)
 
     copy : bool (default True)
         If False, data passed to fit are overwritten and running
@@ -166,7 +169,7 @@ class PCA(_BasePCA):
         arpack :
             run SVD truncated to n_components calling ARPACK solver via
             `scipy.sparse.linalg.svds`. It requires strictly
-            0 < n_components < X.shape[1]
+            0 < n_components < min(X.shape)
         randomized :
             run randomized SVD by the method of Halko et al.
 
@@ -205,7 +208,7 @@ class PCA(_BasePCA):
         Percentage of variance explained by each of the selected components.
 
         If ``n_components`` is not set then all components are stored and the
-        sum of explained variances is equal to 1.0.
+        sum of the ratios is equal to 1.0.
 
     singular_values_ : array, shape (n_components,)
         The singular values corresponding to each of the selected components.
@@ -221,7 +224,8 @@ class PCA(_BasePCA):
         The estimated number of components. When n_components is set
         to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this
         number is estimated from input data. Otherwise it equals the parameter
-        n_components, or n_features if n_components is None.
+        n_components, or the lesser value of n_features and n_samples
+        if n_components is None.
 
     noise_variance_ : float
         The estimated noise covariance following the Probabilistic PCA model
@@ -365,7 +369,7 @@ def _fit(self, X):
 
         # Handle n_components==None
         if self.n_components is None:
-            n_components = X.shape[1]
+            n_components = min(X.shape)
         else:
             n_components = self.n_components
 
@@ -395,10 +399,11 @@ def _fit_full(self, X, n_components):
             if n_samples < n_features:
                 raise ValueError("n_components='mle' is only supported "
                                  "if n_samples >= n_features")
-        elif not 0 <= n_components <= n_features:
+        elif not 0 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 0 and "
-                             "n_features=%r with svd_solver='full'"
-                             % (n_components, n_features))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='full'"
+                             % (n_components, min(n_samples, n_features)))
 
         # Center data
         self.mean_ = np.mean(X, axis=0)
@@ -453,14 +458,19 @@ def _fit_truncated(self, X, n_components, svd_solver):
             raise ValueError("n_components=%r cannot be a string "
                              "with svd_solver='%s'"
                              % (n_components, svd_solver))
-        elif not 1 <= n_components <= n_features:
+        elif not 1 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 1 and "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
-        elif svd_solver == 'arpack' and n_components == n_features:
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                             svd_solver))
+        elif svd_solver == 'arpack' and n_components == min(n_samples,
+        n_features):
             raise ValueError("n_components=%r must be stricly less than "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                             svd_solver))
 
         random_state = check_random_state(self.random_state)
 
@@ -495,7 +505,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         self.explained_variance_ratio_ = \
             self.explained_variance_ / total_var.sum()
         self.singular_values_ = S.copy()  # Store the singular values.
-        if self.n_components_ < n_features:
+        if self.n_components_ < min(n_samples, n_features):
             self.noise_variance_ = (total_var.sum() -
                                     self.explained_variance_.sum())
         else:
diff --git a/sklearn/decomposition/test.py b/sklearn/decomposition/test.py
deleted file mode 100644
index 4c255517e799e..0000000000000
--- a/sklearn/decomposition/test.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#Script to test PCA
-
-import numpy as np
-from .pca import PCA
-import pandas as pd
-
-X = np.array([[-1, -1,3,4,-1, -1,3,4], [-2, -1,5,-1, -1,3,4,2], [-3, -2,1,-1, -1,3,4,1],
-[1, 1,4,-1, -1,3,4,2], [2, 1,0,-1, -1,3,4,2], [3, 2,10,-1, -1,3,4,10]])
-
-ipca = PCA(n_components = 7, svd_solver= "arpack")
-
-ipca.fit(X)
-result = ipca.transform(X)
-
-print result.shape
-print ipca.n_components_