Skip to content

ENH Add get_feature_names_out for random_projection module #21330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@ Changelog
ndarray with `np.nan` when passed a `Float32` or `Float64` pandas extension
array with `pd.NA`. :pr:`21278` by `Thomas Fan`_.

:mod:`sklearn.random_projection`
................................

- |API| Adds :term:`get_feature_names_out` to all transformers in the
:mod:`~sklearn.random_projection` module:
:class:`~sklearn.random_projection.GaussianRandomProjection` and
:class:`~sklearn.random_projection.SparseRandomProjection`. :pr:`21330` by
:user:`Loïc Estève <lesteve>`.

Code and Documentation Contributors
-----------------------------------

Expand Down
13 changes: 12 additions & 1 deletion sklearn/random_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import scipy.sparse as sp

from .base import BaseEstimator, TransformerMixin
from .base import _ClassNamePrefixFeaturesOutMixin

from .utils import check_random_state
from .utils.extmath import safe_sparse_dot
Expand Down Expand Up @@ -290,7 +291,9 @@ def _sparse_random_matrix(n_components, n_features, density="auto", random_state
return np.sqrt(1 / density) / np.sqrt(n_components) * components


class BaseRandomProjection(TransformerMixin, BaseEstimator, metaclass=ABCMeta):
class BaseRandomProjection(
TransformerMixin, BaseEstimator, _ClassNamePrefixFeaturesOutMixin, metaclass=ABCMeta
):
"""Base class for random projections.

Warning: This class should not be used directly.
Expand Down Expand Up @@ -420,6 +423,14 @@ def transform(self, X):
X_new = safe_sparse_dot(X, self.components_.T, dense_output=self.dense_output)
return X_new

@property
def _n_features_out(self):
"""Number of transformed output features.

Used by _ClassNamePrefixFeaturesOutMixin.get_feature_names_out.
"""
return self.n_components


class GaussianRandomProjection(BaseRandomProjection):
"""Reduce dimensionality through Gaussian random projection.
Expand Down
1 change: 0 additions & 1 deletion sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,6 @@ def test_pandas_column_name_consistency(estimator):
"manifold",
"neighbors",
"neural_network",
"random_projection",
]


Expand Down
16 changes: 15 additions & 1 deletion sklearn/tests/test_random_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

all_SparseRandomProjection: List[Any] = [SparseRandomProjection]
all_DenseRandomProjection: List[Any] = [GaussianRandomProjection]
all_RandomProjection = set(all_SparseRandomProjection + all_DenseRandomProjection)
all_RandomProjection = all_SparseRandomProjection + all_DenseRandomProjection
Copy link
Member Author

@lesteve lesteve Nov 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was done to avoid a pytest-xdist error about workers collecting different tests (different order).

It does not seem that crucial that all_RandomProjection is a set in this file.



# Make some random data with uniformly located non zero entries with
Expand Down Expand Up @@ -359,3 +359,17 @@ def test_johnson_lindenstrauss_min_dim():
Regression test for #17111: before #19374, 32-bit systems would fail.
"""
assert johnson_lindenstrauss_min_dim(100, eps=1e-5) == 368416070986


@pytest.mark.parametrize("random_projection_cls", all_RandomProjection)
def test_random_projection_feature_names_out(random_projection_cls):
random_projection = random_projection_cls(n_components=2)
random_projection.fit(data)
names_out = random_projection.get_feature_names_out()
class_name_lower = random_projection_cls.__name__.lower()
expected_names_out = np.array(
[f"{class_name_lower}{i}" for i in range(random_projection.n_components_)],
dtype=object,
)

assert_array_equal(names_out, expected_names_out)