diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 542636b1642f7..208c950c6e43d 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -162,6 +162,15 @@ Changelog ndarray with `np.nan` when passed a `Float32` or `Float64` pandas extension array with `pd.NA`. :pr:`21278` by `Thomas Fan`_. +:mod:`sklearn.random_projection` +................................ + +- |API| Adds :term:`get_feature_names_out` to all transformers in the + :mod:`~sklearn.random_projection` module: + :class:`~sklearn.random_projection.GaussianRandomProjection` and + :class:`~sklearn.random_projection.SparseRandomProjection`. :pr:`21330` by + :user:`Loïc Estève `. + Code and Documentation Contributors ----------------------------------- diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 6b2c9217713e0..3ddbecb677710 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -34,6 +34,7 @@ import scipy.sparse as sp from .base import BaseEstimator, TransformerMixin +from .base import _ClassNamePrefixFeaturesOutMixin from .utils import check_random_state from .utils.extmath import safe_sparse_dot @@ -290,7 +291,9 @@ def _sparse_random_matrix(n_components, n_features, density="auto", random_state return np.sqrt(1 / density) / np.sqrt(n_components) * components -class BaseRandomProjection(TransformerMixin, BaseEstimator, metaclass=ABCMeta): +class BaseRandomProjection( + TransformerMixin, BaseEstimator, _ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta +): """Base class for random projections. Warning: This class should not be used directly. @@ -420,6 +423,14 @@ def transform(self, X): X_new = safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output) return X_new + @property + def _n_features_out(self): + """Number of transformed output features. + + Used by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out. + """ + return self.n_components + class GaussianRandomProjection(BaseRandomProjection): """Reduce dimensionality through Gaussian random projection. diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index a476ba7dc8da5..c3eade24be412 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -371,7 +371,6 @@ def test_pandas_column_name_consistency(estimator): "manifold", "neighbors", "neural_network", - "random_projection", ] diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index 5866fde29d73b..1e894d906a3ad 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -24,7 +24,7 @@ all_SparseRandomProjection: List[Any] = [SparseRandomProjection] all_DenseRandomProjection: List[Any] = [GaussianRandomProjection] -all_RandomProjection = set(all_SparseRandomProjection + all_DenseRandomProjection) +all_RandomProjection = all_SparseRandomProjection + all_DenseRandomProjection # Make some random data with uniformly located non zero entries with @@ -359,3 +359,17 @@ def test_johnson_lindenstrauss_min_dim(): Regression test for #17111: before #19374, 32-bit systems would fail. """ assert johnson_lindenstrauss_min_dim(100, eps=1e-5) == 368416070986 + + +@pytest.mark.parametrize("random_projection_cls", all_RandomProjection) +def test_random_projection_feature_names_out(random_projection_cls): + random_projection = random_projection_cls(n_components=2) + random_projection.fit(data) + names_out = random_projection.get_feature_names_out() + class_name_lower = random_projection_cls.__name__.lower() + expected_names_out = np.array( + [f"{class_name_lower}{i}" for i in range(random_projection.n_components_)], + dtype=object, + ) + + assert_array_equal(names_out, expected_names_out)