Skip to content

ENH add support for 'passthrough' in FeatureUnion #20860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Sep 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ Changelog
:pr:`20880` by :user:`Guillaume Lemaitre <glemaitre>`
and :user:`András Simon <simonandras>`.

:mod:`sklearn.pipeline`
.......................

- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
Setting a transformer to "passthrough" will pass the features unchanged.
:pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.

Code and Documentation Contributors
-----------------------------------
Expand Down
35 changes: 21 additions & 14 deletions sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from joblib import Parallel

from .base import clone, TransformerMixin
from .preprocessing import FunctionTransformer
from .utils._estimator_html_repr import _VisualBlock
from .utils.metaestimators import available_if
from .utils import (
Expand Down Expand Up @@ -853,21 +854,24 @@ class FeatureUnion(TransformerMixin, _BaseComposition):

Parameters of the transformers may be set using its name and the parameter
name separated by a '__'. A transformer may be replaced entirely by
setting the parameter with its name to another transformer,
or removed by setting to 'drop'.
setting the parameter with its name to another transformer, removed by
setting to 'drop' or disabled by setting to 'passthrough' (features are
passed without transformation).

Read more in the :ref:`User Guide <feature_union>`.

.. versionadded:: 0.13

Parameters
----------
transformer_list : list of tuple
List of tuple containing `(str, transformer)`. The first element
of the tuple is name affected to the transformer while the
second element is a scikit-learn transformer instance.
The transformer instance can also be `"drop"` for it to be
ignored.
transformer_list : list of (str, transformer) tuples
List of transformer objects to be applied to the data. The first
half of each tuple is the name of the transformer. The transformer can
be 'drop' for it to be ignored or can be 'passthrough' for features to
be passed unchanged.

.. versionadded:: 1.1
Added the option `"passthrough"`.

.. versionchanged:: 0.22
Deprecated `None` as a transformer in favor of 'drop'.
Expand Down Expand Up @@ -977,7 +981,7 @@ def _validate_transformers(self):

# validate estimators
for t in transformers:
if t == "drop":
if t in ("drop", "passthrough"):
continue
if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(
t, "transform"
Expand All @@ -1004,12 +1008,15 @@ def _iter(self):
Generate (name, trans, weight) tuples excluding None and
'drop' transformers.
"""

get_weight = (self.transformer_weights or {}).get
return (
(name, trans, get_weight(name))
for name, trans in self.transformer_list
if trans != "drop"
)

for name, trans in self.transformer_list:
if trans == "drop":
continue
if trans == "passthrough":
trans = FunctionTransformer()
yield (name, trans, get_weight(name))

@deprecated(
"get_feature_names is deprecated in 1.0 and will be removed "
Expand Down
54 changes: 54 additions & 0 deletions sklearn/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,60 @@ def test_set_feature_union_step_drop(get_names):
assert not record


def test_set_feature_union_passthrough():
"""Check the behaviour of setting a transformer to `"passthrough"`."""
mult2 = Mult(2)
mult3 = Mult(3)
X = np.asarray([[1]])

ft = FeatureUnion([("m2", mult2), ("m3", mult3)])
assert_array_equal([[2, 3]], ft.fit(X).transform(X))
assert_array_equal([[2, 3]], ft.fit_transform(X))

ft.set_params(m2="passthrough")
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

ft.set_params(m3="passthrough")
assert_array_equal([[1, 1]], ft.fit(X).transform(X))
assert_array_equal([[1, 1]], ft.fit_transform(X))

# check we can change back
ft.set_params(m3=mult3)
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

# Check 'passthrough' step at construction time
ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)])
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

X = iris.data
columns = X.shape[1]
pca = PCA(n_components=2, svd_solver="randomized", random_state=0)

ft = FeatureUnion([("passthrough", "passthrough"), ("pca", pca)])
assert_array_equal(X, ft.fit(X).transform(X)[:, :columns])
assert_array_equal(X, ft.fit_transform(X)[:, :columns])

ft.set_params(pca="passthrough")
X_ft = ft.fit(X).transform(X)
assert_array_equal(X_ft, np.hstack([X, X]))
X_ft = ft.fit_transform(X)
assert_array_equal(X_ft, np.hstack([X, X]))

ft.set_params(passthrough=pca)
assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:])
assert_array_equal(X, ft.fit_transform(X)[:, -columns:])

ft = FeatureUnion(
[("passthrough", "passthrough"), ("pca", pca)],
transformer_weights={"passthrough": 2},
)
assert_array_equal(X * 2, ft.fit(X).transform(X)[:, :columns])
assert_array_equal(X * 2, ft.fit_transform(X)[:, :columns])


def test_step_name_validation():
error_message_1 = r"Estimator names must not contain __: got \['a__q'\]"
error_message_2 = r"Names provided are not unique: \['a', 'a'\]"
Expand Down