Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions doc/whats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,10 @@ Changelog
- |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__`
which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_.

- |ENH| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if
only `inverse_func` is provided without `func` (that would default to identity) being
explicitly set as well. :pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.
- |Enhancement| :class:`compose.TransformedTargetRegressor` now raises an error in `fit`
if only `inverse_func` is provided without `func` (that would default to identity)
being explicitly set as well.
:pr:`28483` by :user:`Stefanie Senger <StefanieSenger>`.

:mod:`sklearn.datasets`
.......................
Expand All @@ -113,6 +114,13 @@ Changelog
By default, the functions will retry up to 3 times in case of network failures.
:pr:`28160` by :user:`Zhehao Liu <MaxwellLZH>` and :user:`Filip Karlo Došilović <fkdosilovic>`.

:mod:`sklearn.decomposition`
............................

- |Enhancement| :class:`~decomposition.PCA` now automatically selects the ARPACK solver
for sparse inputs when `svd_solver="auto"` instead of raising an error.
:pr:`28498` by :user:`Thanh Lam Dang <lamdang2k>`.

:mod:`sklearn.dummy`
....................

Expand Down Expand Up @@ -166,7 +174,7 @@ Changelog
:class:`linear_model.Lasso` and :class:`linear_model.LassoCV` now explicitly don't
accept large sparse data formats. :pr:`27576` by :user:`Stefanie Senger
<StefanieSenger>`.

- |API| :class:`linear_model.RidgeCV` and :class:`linear_model.RidgeClassifierCV`
will now allow `alpha=0` when `cv != None`, which is consistent with
:class:`linear_model.Ridge` and :class:`linear_model.RidgeClassifier`.
Expand Down
15 changes: 6 additions & 9 deletions sklearn/decomposition/_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,12 @@ def _fit(self, X):
"""Dispatch to the right submethod depending on the chosen solver."""
xp, is_array_api_compliant = get_namespace(X)

# Raise an error for sparse input and unsupported svd_solver
if issparse(X) and self.svd_solver != "arpack":
if issparse(X) and self.svd_solver not in {"arpack", "auto"}:
raise TypeError(
'PCA only support sparse inputs with the "arpack" solver, while '
f'"{self.svd_solver}" was passed. See TruncatedSVD for a possible'
" alternative."
)
# Raise an error for non-Numpy input and arpack solver.
if self.svd_solver == "arpack" and is_array_api_compliant:
raise ValueError(
"PCA with svd_solver='arpack' is not supported for Array API inputs."
Expand All @@ -485,29 +483,28 @@ def _fit(self, X):
ensure_2d=True,
copy=self.copy,
)
self._fit_svd_solver = self.svd_solver
if self._fit_svd_solver == "auto" and issparse(X):
self._fit_svd_solver = "arpack"

# Handle n_components==None
if self.n_components is None:
if self.svd_solver != "arpack":
if self._fit_svd_solver != "arpack":
n_components = min(X.shape)
else:
n_components = min(X.shape) - 1
else:
n_components = self.n_components

# Handle svd_solver
self._fit_svd_solver = self.svd_solver
if self._fit_svd_solver == "auto":
# Small problem or n_components == 'mle', just call full PCA
if max(X.shape) <= 500 or n_components == "mle":
self._fit_svd_solver = "full"
elif 1 <= n_components < 0.8 * min(X.shape):
self._fit_svd_solver = "randomized"
# This is also the case of n_components in (0,1)
# This is also the case of n_components in (0, 1)
else:
self._fit_svd_solver = "full"

# Call different fits for either full or truncated SVD
if self._fit_svd_solver == "full":
return self._fit_full(X, n_components)
elif self._fit_svd_solver in ["arpack", "randomized"]:
Expand Down
20 changes: 19 additions & 1 deletion sklearn/decomposition/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_pca_sparse_fit_transform(global_random_seed, sparse_container):
assert_allclose(pca_fit.transform(X2), pca_fit_transform.transform(X2), rtol=2e-9)


@pytest.mark.parametrize("svd_solver", ["randomized", "full", "auto"])
@pytest.mark.parametrize("svd_solver", ["randomized", "full"])
@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_container):
random_state = np.random.RandomState(global_random_seed)
Expand All @@ -179,6 +179,24 @@ def test_sparse_pca_solver_error(global_random_seed, svd_solver, sparse_containe
pca.fit(X)


@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
def test_sparse_pca_auto_arpack_singluar_values_consistency(
global_random_seed, sparse_container
):
"""Check that "auto" and "arpack" solvers are equivalent for sparse inputs."""
random_state = np.random.RandomState(global_random_seed)
X = sparse_container(
sp.sparse.random(
SPARSE_M,
SPARSE_N,
random_state=random_state,
)
)
pca_arpack = PCA(n_components=10, svd_solver="arpack").fit(X)
pca_auto = PCA(n_components=10, svd_solver="auto").fit(X)
assert_allclose(pca_arpack.singular_values_, pca_auto.singular_values_, rtol=5e-3)


def test_no_empty_slice_warning():
# test if we avoid numpy warnings for computing over empty arrays
n_components = 10
Expand Down