From 64823973694495d013d558b569e15d9dc78b7a01 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Tue, 13 Feb 2024 17:02:10 +0100
Subject: [PATCH 01/16] Correct PCA documentation about sparse input

---
 sklearn/decomposition/_pca.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index d121c5e5c186f..515a234edac06 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -133,8 +133,8 @@ class PCA(_BasePCA):
     It can also use the scipy.sparse.linalg ARPACK implementation of the
     truncated SVD.
 
-    Notice that this class does not support sparse input. See
-    :class:`TruncatedSVD` for an alternative with sparse data.
+    This class now supports `scipy.sparse.sparray` and `scipy.sparse.spmatrix`
+    inputs when using the ARPACK solver.
 
     For a usage example, see
     :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`

From 22f9cd0b3e8e8de156cce315e264a066e4f1fc24 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Thu, 15 Feb 2024 09:45:03 +0100
Subject: [PATCH 02/16] Merge two paragraphs

---
 sklearn/decomposition/_pca.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 515a234edac06..bf9b909dd2814 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -131,10 +131,9 @@ class PCA(_BasePCA):
     data and the number of components to extract.
 
     It can also use the scipy.sparse.linalg ARPACK implementation of the
-    truncated SVD.
-
-    This class now supports `scipy.sparse.sparray` and `scipy.sparse.spmatrix`
-    inputs when using the ARPACK solver.
+    truncated SVD. With sparse inputs, the ARPACK implementation of the truncated 
+    SVD can be used through :func:`scipy.sparse.linalg.svds`. Alternatively, one may
+    consider :class:`TruncatedSVD` where the data are not centered.
 
     For a usage example, see
     :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`

From a5ddb352c05c511800de7ed0b4a009fb3c0be2a3 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Thu, 15 Feb 2024 10:58:22 +0100
Subject: [PATCH 03/16] Remove trailing space

---
 sklearn/decomposition/_pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index bf9b909dd2814..6d019faae76e3 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -131,7 +131,7 @@ class PCA(_BasePCA):
     data and the number of components to extract.
 
     It can also use the scipy.sparse.linalg ARPACK implementation of the
-    truncated SVD. With sparse inputs, the ARPACK implementation of the truncated 
+    truncated SVD. With sparse inputs, the ARPACK implementation of the truncated
     SVD can be used through :func:`scipy.sparse.linalg.svds`. Alternatively, one may
     consider :class:`TruncatedSVD` where the data are not centered.
 

From b87380db0bdec22df69098d97996174c4134e8f4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Mon, 19 Feb 2024 11:41:21 +0100
Subject: [PATCH 04/16] remove redundant sentence

---
 sklearn/decomposition/_pca.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 6d019faae76e3..187e3b1067bee 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -130,10 +130,9 @@ class PCA(_BasePCA):
     SVD by the method of Halko et al. 2009, depending on the shape of the input
     data and the number of components to extract.
 
-    It can also use the scipy.sparse.linalg ARPACK implementation of the
-    truncated SVD. With sparse inputs, the ARPACK implementation of the truncated
-    SVD can be used through :func:`scipy.sparse.linalg.svds`. Alternatively, one may
-    consider :class:`TruncatedSVD` where the data are not centered.
+    With sparse inputs, the ARPACK implementation of the truncated SVD can be
+    used (i.e. through :func:`scipy.sparse.linalg.svds`). Alternatively, one
+    may consider :class:`TruncatedSVD` where the data are not centered.
 
     For a usage example, see
     :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`

From 03b84b72bd52550ef682708df6c5ba6d94e763c1 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Thu, 22 Feb 2024 11:03:13 +0100
Subject: [PATCH 05/16] Select arpack, remove error raising and adapt test

---
 sklearn/decomposition/_pca.py           |  8 ++------
 sklearn/decomposition/tests/test_pca.py | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 187e3b1067bee..0527b80e97ec7 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -465,13 +465,9 @@ def _fit(self, X):
         """Dispatch to the right submethod depending on the chosen solver."""
         xp, is_array_api_compliant = get_namespace(X)
 
-        # Raise an error for sparse input and unsupported svd_solver
+        # Automatically select "arpack" solver if the input is sparse
         if issparse(X) and self.svd_solver != "arpack":
-            raise TypeError(
-                'PCA only support sparse inputs with the "arpack" solver, while '
-                f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
-                " alternative."
-            )
+            self.svd_solver = "arpack"
         # Raise an error for non-Numpy input and arpack solver.
         if self.svd_solver == "arpack" and is_array_api_compliant:
             raise ValueError(
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 44281b9038697..ffd1c8f348c1d 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -161,7 +161,9 @@ def test_pca_sparse_fit_transform(global_random_seed, sparse_container):
 
 @pytest.mark.parametrize("svd_solver", ["randomized", "full", "auto"])
 @pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
-def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_container):
+def test_sparse_pca_solver_automatically_select_arpack(
+    global_random_seed, svd_solver, sparse_container
+):
     random_state = np.random.RandomState(global_random_seed)
     X = sparse_container(
         sp.sparse.random(
@@ -170,13 +172,14 @@ def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_containe
             random_state=random_state,
         )
     )
-    pca = PCA(n_components=30, svd_solver=svd_solver)
-    error_msg_pattern = (
-        f'PCA only support sparse inputs with the "arpack" solver, while "{svd_solver}"'
-        " was passed"
-    )
-    with pytest.raises(TypeError, match=error_msg_pattern):
-        pca.fit(X)
+    pca_arpack = PCA(n_components=10, svd_solver="arpack")
+    pca_others = PCA(n_components=10, svd_solver=svd_solver)
+
+    # check the equivalence of pca_arpack.fit and pca_others.fit
+    X_arpack = pca_arpack.fit(X)
+    X_others = pca_others.fit(X)
+
+    assert_allclose(X_arpack.singular_values_, X_others.singular_values_, rtol=5e-3)
 
 
 def test_no_empty_slice_warning():

From c8768cbed9d9335bc39212a74253be59baa7d949 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Thu, 22 Feb 2024 11:52:50 +0100
Subject: [PATCH 06/16] Add changelog

---
 doc/whats_new/v1.5.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index f8588a89aeb7a..d898fbec6f24e 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -64,6 +64,13 @@ Changelog
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
+:mod:`sklearn.decomposition`
+............................
+
+- |Enhancement| :class:`decomposition.PCA` now automatically select `arpack` solver
+  for sparse inputs and does not raise error if other solvers are passed.
+  :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
+
 :mod:`sklearn.dummy`
 ....................
 

From 6701cc4705a2af78c1d2814039125a901f5dd504 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Fri, 23 Feb 2024 10:01:58 +0100
Subject: [PATCH 07/16] Force arpack only when auto is chosen, raise TypeError
 otherwise

---
 sklearn/decomposition/_pca.py           | 14 ++++++++---
 sklearn/decomposition/tests/test_pca.py | 33 +++++++++++++++++++------
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 0527b80e97ec7..6ee6336527bcb 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -465,9 +465,13 @@ def _fit(self, X):
         """Dispatch to the right submethod depending on the chosen solver."""
         xp, is_array_api_compliant = get_namespace(X)
 
-        # Automatically select "arpack" solver if the input is sparse
-        if issparse(X) and self.svd_solver != "arpack":
-            self.svd_solver = "arpack"
+        # Raise an error for sparse input and unsupported svd_solver
+        if issparse(X) and self.svd_solver not in {"arpack", "auto"}:
+            raise TypeError(
+                'PCA only support sparse inputs with the "arpack" solver, while '
+                f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
+                " alternative."
+            )
         # Raise an error for non-Numpy input and arpack solver.
         if self.svd_solver == "arpack" and is_array_api_compliant:
             raise ValueError(
@@ -495,7 +499,9 @@ def _fit(self, X):
         self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto":
             # Small problem or n_components == 'mle', just call full PCA
-            if max(X.shape) <= 500 or n_components == "mle":
+            if issparse(X):
+                self._fit_svd_solver = "arpack"
+            elif max(X.shape) <= 500 or n_components == "mle":
                 self._fit_svd_solver = "full"
             elif 1 <= n_components < 0.8 * min(X.shape):
                 self._fit_svd_solver = "randomized"
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index ffd1c8f348c1d..1fab1dd92f463 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -159,10 +159,29 @@ def test_pca_sparse_fit_transform(global_random_seed, sparse_container):
     assert_allclose(pca_fit.transform(X2), pca_fit_transform.transform(X2), rtol=2e-9)
 
 
-@pytest.mark.parametrize("svd_solver", ["randomized", "full", "auto"])
+@pytest.mark.parametrize("svd_solver", ["randomized", "full"])
 @pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
-def test_sparse_pca_solver_automatically_select_arpack(
-    global_random_seed, svd_solver, sparse_container
+def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_container):
+    random_state = np.random.RandomState(global_random_seed)
+    X = sparse_container(
+        sp.sparse.random(
+            SPARSE_M,
+            SPARSE_N,
+            random_state=random_state,
+        )
+    )
+    pca = PCA(n_components=30, svd_solver=svd_solver)
+    error_msg_pattern = (
+        f'PCA only support sparse inputs with the "arpack" solver, while "{svd_solver}"'
+        " was passed"
+    )
+    with pytest.raises(TypeError, match=error_msg_pattern):
+        pca.fit(X)
+
+
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
+def test_sparse_pca_auto_arpack_singluar_values_consistency(
+    global_random_seed, sparse_container
 ):
     random_state = np.random.RandomState(global_random_seed)
     X = sparse_container(
@@ -173,13 +192,13 @@ def test_sparse_pca_solver_automatically_select_arpack(
         )
     )
     pca_arpack = PCA(n_components=10, svd_solver="arpack")
-    pca_others = PCA(n_components=10, svd_solver=svd_solver)
+    pca_auto = PCA(n_components=10, svd_solver="auto")
 
-    # check the equivalence of pca_arpack.fit and pca_others.fit
+    # check the equivalence of pca_arpack.fit and pca_auto.fit
     X_arpack = pca_arpack.fit(X)
-    X_others = pca_others.fit(X)
+    X_auto = pca_auto.fit(X)
 
-    assert_allclose(X_arpack.singular_values_, X_others.singular_values_, rtol=5e-3)
+    assert_allclose(X_arpack.singular_values_, X_auto.singular_values_, rtol=5e-3)
 
 
 def test_no_empty_slice_warning():

From f35b5d13f5262b1b9eda4915fd4649f95c124e20 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Fri, 23 Feb 2024 11:18:42 +0100
Subject: [PATCH 08/16] Fix error when n_components is unspecified, solver is
 auto and X sparse

---
 sklearn/decomposition/_pca.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 6ee6336527bcb..0d95de9c0b200 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -498,9 +498,13 @@ def _fit(self, X):
         # Handle svd_solver
         self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto":
-            # Small problem or n_components == 'mle', just call full PCA
+            # Automatically select arpack for sparse inputs
+            # Reduce n_components by 1 when passing from auto to arpack
             if issparse(X):
                 self._fit_svd_solver = "arpack"
+                if self.n_components is None:
+                    n_components = n_components - 1
+            # Small problem or n_components == 'mle', just call full PCA
             elif max(X.shape) <= 500 or n_components == "mle":
                 self._fit_svd_solver = "full"
             elif 1 <= n_components < 0.8 * min(X.shape):

From 0c7e3a3f4ec4b6fa90e811df92f19c20ddb77c1a Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Sun, 25 Feb 2024 11:06:40 +0100
Subject: [PATCH 09/16] Apply comments

---
 doc/whats_new/v1.5.rst                  |  9 +++++----
 sklearn/decomposition/_pca.py           |  2 +-
 sklearn/decomposition/tests/test_pca.py | 12 ++++--------
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index d898fbec6f24e..93e562ad4d0a7 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -66,10 +66,11 @@ Changelog
 
 :mod:`sklearn.decomposition`
 ............................
-
-- |Enhancement| :class:`decomposition.PCA` now automatically select `arpack` solver
-  for sparse inputs and does not raise error if other solvers are passed.
-  :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
+- |ENH| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
+  only `inverse_func` is provided without `func` (that would default to identity) being
+  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+- |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
+  for sparse inputs when `svd_solver="auto"` instead of raising an error.
 
 :mod:`sklearn.dummy`
 ....................
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 0d95de9c0b200..d767a6df40068 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -503,7 +503,7 @@ def _fit(self, X):
             if issparse(X):
                 self._fit_svd_solver = "arpack"
                 if self.n_components is None:
-                    n_components = n_components - 1
+                    n_components -= 1
             # Small problem or n_components == 'mle', just call full PCA
             elif max(X.shape) <= 500 or n_components == "mle":
                 self._fit_svd_solver = "full"
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 1fab1dd92f463..1ec359a028f8c 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -183,6 +183,7 @@ def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_containe
 def test_sparse_pca_auto_arpack_singluar_values_consistency(
     global_random_seed, sparse_container
 ):
+    """Check that "auto" and "arpack" solvers are equivalent for sparse inputs."""
     random_state = np.random.RandomState(global_random_seed)
     X = sparse_container(
         sp.sparse.random(
@@ -191,14 +192,9 @@ def test_sparse_pca_auto_arpack_singluar_values_consistency(
             random_state=random_state,
         )
     )
-    pca_arpack = PCA(n_components=10, svd_solver="arpack")
-    pca_auto = PCA(n_components=10, svd_solver="auto")
-
-    # check the equivalence of pca_arpack.fit and pca_auto.fit
-    X_arpack = pca_arpack.fit(X)
-    X_auto = pca_auto.fit(X)
-
-    assert_allclose(X_arpack.singular_values_, X_auto.singular_values_, rtol=5e-3)
+    pca_arpack = PCA(n_components=10, svd_solver="arpack").fit(X)
+    pca_auto = PCA(n_components=10, svd_solver="auto").fit(X)
+    assert_allclose(pca_arpack.singular_values_, pca_auto.singular_values_, rtol=5e-3)
 
 
 def test_no_empty_slice_warning():

From 4fc932ed2e399504dbfa1186f4bb9e53e3747e25 Mon Sep 17 00:00:00 2001
From: Thanh Lam DANG <70220760+lamdang2k@users.noreply.github.com>
Date: Sun, 25 Feb 2024 11:23:10 +0100
Subject: [PATCH 10/16] Update v1.5.rst

---
 doc/whats_new/v1.5.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 35f5547f4ff51..38690a2b65c9d 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -74,14 +74,16 @@ Changelog
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
-
 :mod:`sklearn.decomposition`
 ............................
+
 - |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
   only `inverse_func` is provided without `func` (that would default to identity) being
   explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+
 - |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
   for sparse inputs when `svd_solver="auto"` instead of raising an error.
+  :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
 
 :mod:`sklearn.dummy`
 ....................

From 9156ad1768e3d72aefca70a9ebd8ad59fe3de8f6 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Mon, 26 Feb 2024 20:32:07 +0100
Subject: [PATCH 11/16] Fix doc error

---
 doc/whats_new/v1.5.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 7634b2787fcab..267dc56fc0efd 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -97,12 +97,12 @@ Changelog
 
 - |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
   only `inverse_func` is provided without `func` (that would default to identity) being
-  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>
-  
+  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+
 - |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
   for sparse inputs when `svd_solver="auto"` instead of raising an error.
   :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
-  
+
 :mod:`sklearn.datasets`
 .......................
 
@@ -173,7 +173,7 @@ Changelog
   :class:`linear_model.Lasso` and :class:`linear_model.LassoCV` now explicitly don't
   accept large sparse data formats. :pr:`27576` by :user:`Stefanie Senger
   <StefanieSenger>`.
-  
+
 - |API| :class:`linear_model.RidgeCV` and :class:`linear_model.RidgeClassifierCV`
   will now allow `alpha=0` when `cv != None`, which is consistent with
   :class:`linear_model.Ridge` and :class:`linear_model.RidgeClassifier`.

From b99d202d334ab303a8e6e9127e9414222e063e91 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Tue, 27 Feb 2024 17:49:55 +0100
Subject: [PATCH 12/16] Fix order in changelog

---
 doc/whats_new/v1.5.rst | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 267dc56fc0efd..08ce388a6f8ff 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -92,17 +92,6 @@ Changelog
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
-:mod:`sklearn.decomposition`
-............................
-
-- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
-  only `inverse_func` is provided without `func` (that would default to identity) being
-  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
-
-- |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
-  for sparse inputs when `svd_solver="auto"` instead of raising an error.
-  :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
-
 :mod:`sklearn.datasets`
 .......................
 
@@ -120,6 +109,17 @@ Changelog
   By default, the functions will retry up to 3 times in case of network failures.
   :pr:`28160` by :user:`Zhehao Liu <MaxwellLZH>` and :user:`Filip Karlo Došilović <fkdosilovic>`.
 
+:mod:`sklearn.decomposition`
+............................
+
+- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
+  only `inverse_func` is provided without `func` (that would default to identity) being
+  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+
+- |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
+  for sparse inputs when `svd_solver="auto"` instead of raising an error.
+  :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
+
 :mod:`sklearn.dummy`
 ....................
 

From aeb42a48e5a9dd6c8594fc20998a00389abe0b39 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Wed, 28 Feb 2024 19:18:49 +0100
Subject: [PATCH 13/16] Refactor codes and fix module name in changelog

---
 doc/whats_new/v1.5.rst        |  7 +++++--
 sklearn/decomposition/_pca.py | 11 ++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 08ce388a6f8ff..b8f407117a2d7 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -109,13 +109,16 @@ Changelog
   By default, the functions will retry up to 3 times in case of network failures.
   :pr:`28160` by :user:`Zhehao Liu <MaxwellLZH>` and :user:`Filip Karlo Došilović <fkdosilovic>`.
 
-:mod:`sklearn.decomposition`
-............................
+:mod:`sklearn.compose`
+......................
 
 - |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
   only `inverse_func` is provided without `func` (that would default to identity) being
   explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
 
+:mod:`sklearn.decomposition`
+............................
+
 - |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
   for sparse inputs when `svd_solver="auto"` instead of raising an error.
   :pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index d767a6df40068..30a5fc13d8959 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -485,6 +485,9 @@ def _fit(self, X):
             ensure_2d=True,
             copy=self.copy,
         )
+        # Handle sparse input, automatically select arpack solver
+        if self.svd_solver == "auto" and issparse(X):
+            self.svd_solver = "arpack"
 
         # Handle n_components==None
         if self.n_components is None:
@@ -498,14 +501,8 @@ def _fit(self, X):
         # Handle svd_solver
         self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto":
-            # Automatically select arpack for sparse inputs
-            # Reduce n_components by 1 when passing from auto to arpack
-            if issparse(X):
-                self._fit_svd_solver = "arpack"
-                if self.n_components is None:
-                    n_components -= 1
             # Small problem or n_components == 'mle', just call full PCA
-            elif max(X.shape) <= 500 or n_components == "mle":
+            if max(X.shape) <= 500 or n_components == "mle":
                 self._fit_svd_solver = "full"
             elif 1 <= n_components < 0.8 * min(X.shape):
                 self._fit_svd_solver = "randomized"

From 0bb9273a8eb5c642448751e5071655783b47a998 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Wed, 28 Feb 2024 19:21:07 +0100
Subject: [PATCH 14/16] Remove redundant compose

---
 doc/whats_new/v1.5.rst | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index b8f407117a2d7..1128bb1f2338b 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -92,6 +92,10 @@ Changelog
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
+- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
+  only `inverse_func` is provided without `func` (that would default to identity) being
+  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+
 :mod:`sklearn.datasets`
 .......................
 
@@ -109,13 +113,6 @@ Changelog
   By default, the functions will retry up to 3 times in case of network failures.
   :pr:`28160` by :user:`Zhehao Liu <MaxwellLZH>` and :user:`Filip Karlo Došilović <fkdosilovic>`.
 
-:mod:`sklearn.compose`
-......................
-
-- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
-  only `inverse_func` is provided without `func` (that would default to identity) being
-  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
-
 :mod:`sklearn.decomposition`
 ............................
 

From a1af4242e2b4553f794d2137d248afac615625f2 Mon Sep 17 00:00:00 2001
From: tdang2k <lamdan0008@gmail.com>
Date: Wed, 28 Feb 2024 20:02:37 +0100
Subject: [PATCH 15/16] Do not modify svd_solver

---
 sklearn/decomposition/_pca.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 30a5fc13d8959..ba8cdb992c899 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -486,12 +486,13 @@ def _fit(self, X):
             copy=self.copy,
         )
         # Handle sparse input, automatically select arpack solver
-        if self.svd_solver == "auto" and issparse(X):
-            self.svd_solver = "arpack"
+        self._fit_svd_solver = self.svd_solver
+        if self._fit_svd_solver == "auto" and issparse(X):
+            self._fit_svd_solver = "arpack"
 
         # Handle n_components==None
         if self.n_components is None:
-            if self.svd_solver != "arpack":
+            if self._fit_svd_solver != "arpack":
                 n_components = min(X.shape)
             else:
                 n_components = min(X.shape) - 1
@@ -499,7 +500,6 @@ def _fit(self, X):
             n_components = self.n_components
 
         # Handle svd_solver
-        self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto":
             # Small problem or n_components == 'mle', just call full PCA
             if max(X.shape) <= 500 or n_components == "mle":

From 69df867280c63fc54071044135240db17804e770 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Thu, 29 Feb 2024 15:45:24 +0100
Subject: [PATCH 16/16] cosmetic

---
 doc/whats_new/v1.5.rst        | 7 ++++---
 sklearn/decomposition/_pca.py | 8 +-------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
index 1128bb1f2338b..fc3c2337fc4e1 100644
--- a/doc/whats_new/v1.5.rst
+++ b/doc/whats_new/v1.5.rst
@@ -92,9 +92,10 @@ Changelog
 - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
   which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.
 
-- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
-  only `inverse_func` is provided without `func` (that would default to identity) being
-  explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
+- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit`
+  if only `inverse_func` is provided without `func` (that would default to identity)
+  being explicitly set as well.
+  :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
 
 :mod:`sklearn.datasets`
 .......................
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index ba8cdb992c899..abd2fda2d5d2f 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -465,14 +465,12 @@ def _fit(self, X):
         """Dispatch to the right submethod depending on the chosen solver."""
         xp, is_array_api_compliant = get_namespace(X)
 
-        # Raise an error for sparse input and unsupported svd_solver
         if issparse(X) and self.svd_solver not in {"arpack", "auto"}:
             raise TypeError(
                 'PCA only support sparse inputs with the "arpack" solver, while '
                 f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
                 " alternative."
             )
-        # Raise an error for non-Numpy input and arpack solver.
         if self.svd_solver == "arpack" and is_array_api_compliant:
             raise ValueError(
                 "PCA with svd_solver='arpack' is not supported for Array API inputs."
@@ -485,12 +483,10 @@ def _fit(self, X):
             ensure_2d=True,
             copy=self.copy,
         )
-        # Handle sparse input, automatically select arpack solver
         self._fit_svd_solver = self.svd_solver
         if self._fit_svd_solver == "auto" and issparse(X):
             self._fit_svd_solver = "arpack"
 
-        # Handle n_components==None
         if self.n_components is None:
             if self._fit_svd_solver != "arpack":
                 n_components = min(X.shape)
@@ -499,18 +495,16 @@ def _fit(self, X):
         else:
             n_components = self.n_components
 
-        # Handle svd_solver
         if self._fit_svd_solver == "auto":
             # Small problem or n_components == 'mle', just call full PCA
             if max(X.shape) <= 500 or n_components == "mle":
                 self._fit_svd_solver = "full"
             elif 1 <= n_components < 0.8 * min(X.shape):
                 self._fit_svd_solver = "randomized"
-            # This is also the case of n_components in (0,1)
+            # This is also the case of n_components in (0, 1)
             else:
                 self._fit_svd_solver = "full"
 
-        # Call different fits for either full or truncated SVD
         if self._fit_svd_solver == "full":
             return self._fit_full(X, n_components)
         elif self._fit_svd_solver in ["arpack", "randomized"]: