Skip to content

MNT Clean-up deprecations for 1.7: Remainder column type of ColumnTransformer #31167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new/upcoming_changes/sklearn.compose/31167.api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The `force_int_remainder_cols` parameter of :class:`compose.ColumnTransformer` and
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meant to be in bullet form?

:func:`compose.make_column_transformer` is deprecated and will be removed in 1.9.
It has no effect.
By :user:`Jérémie du Boisberranger <jeremiedbb>`
160 changes: 28 additions & 132 deletions sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# SPDX-License-Identifier: BSD-3-Clause

import warnings
from collections import Counter, UserList
from collections import Counter
from functools import partial
from itertools import chain
from numbers import Integral, Real
Expand Down Expand Up @@ -161,11 +161,8 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
.. versionchanged:: 1.6
`verbose_feature_names_out` can be a callable or a string to be formatted.

force_int_remainder_cols : bool, default=True
Force the columns of the last entry of `transformers_`, which
corresponds to the "remainder" transformer, to always be stored as
indices (int) rather than column names (str). See description of the
`transformers_` attribute for details.
force_int_remainder_cols : bool, default=False
This parameter has no effect.

.. note::
If you do not access the list of columns for the remainder columns
Expand All @@ -178,6 +175,9 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
The default value for `force_int_remainder_cols` will change from
`True` to `False` in version 1.7.

.. deprecated:: 1.7
`force_int_remainder_cols` is deprecated and will be removed in 1.9.

Attributes
----------
transformers_ : list
Expand All @@ -192,16 +192,12 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
``len(transformers_)==len(transformers)+1``, otherwise
``len(transformers_)==len(transformers)``.

.. versionchanged:: 1.5
If there are remaining columns and `force_int_remainder_cols` is
True, the remaining columns are always represented by their
positional indices in the input `X` (as in older versions). If
`force_int_remainder_cols` is False, the format attempts to match
that of the other transformers: if all columns were provided as
column names (`str`), the remaining columns are stored as column
names; if all columns were provided as mask arrays (`bool`), so are
the remaining columns; in all other cases the remaining columns are
stored as indices (`int`).
.. versionadded:: 1.7
The format of the remaining columns now attempts to match that of the other
transformers: if all columns were provided as column names (`str`), the
remaining columns are stored as column names; if all columns were provided
as mask arrays (`bool`), so are the remaining columns; in all other cases
the remaining columns are stored as indices (`int`).

named_transformers_ : :class:`~sklearn.utils.Bunch`
Read-only attribute to access any transformer by given name.
Expand Down Expand Up @@ -300,7 +296,7 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
"transformer_weights": [dict, None],
"verbose": ["verbose"],
"verbose_feature_names_out": ["boolean", str, callable],
"force_int_remainder_cols": ["boolean"],
"force_int_remainder_cols": ["boolean", Hidden(StrOptions({"deprecated"}))],
}

def __init__(
Expand All @@ -313,7 +309,7 @@ def __init__(
transformer_weights=None,
verbose=False,
verbose_feature_names_out=True,
force_int_remainder_cols=True,
force_int_remainder_cols="deprecated",
):
self.transformers = transformers
self.remainder = remainder
Expand Down Expand Up @@ -477,13 +473,6 @@ def _iter(self, fitted, column_as_labels, skip_drop, skip_empty_columns):
if self._remainder[2]:
transformers = chain(transformers, [self._remainder])

# We want the warning about the future change of the remainder
# columns dtype to be shown only when a user accesses them
# directly, not when they are used by the ColumnTransformer itself.
# We disable warnings here; they are enabled when setting
# self.transformers_.
transformers = _with_dtype_warning_enabled_set_to(False, transformers)

get_weight = (self.transformer_weights or {}).get

for name, trans, columns in transformers:
Expand Down Expand Up @@ -578,8 +567,6 @@ def _get_remainder_cols_dtype(self):

def _get_remainder_cols(self, indices):
dtype = self._get_remainder_cols_dtype()
if self.force_int_remainder_cols and dtype != "int":
return _RemainderColsList(indices, future_dtype=dtype)
if dtype == "str":
return list(self.feature_names_in_[indices])
if dtype == "bool":
Expand Down Expand Up @@ -753,7 +740,7 @@ def _update_fitted_transformers(self, transformers):

# sanity check that transformers is exhausted
assert not list(fitted_transformers)
self.transformers_ = _with_dtype_warning_enabled_set_to(True, transformers_)
self.transformers_ = transformers_

def _validate_output(self, result):
"""
Expand Down Expand Up @@ -984,6 +971,14 @@ def fit_transform(self, X, y=None, **params):
_raise_for_params(params, self, "fit_transform")
_check_feature_names(self, X, reset=True)

if self.force_int_remainder_cols != "deprecated":
warnings.warn(
"The parameter `force_int_remainder_cols` is deprecated and will be "
"removed in 1.9. It has no effect. Leave it to its default value to "
"avoid this warning.",
FutureWarning,
)

X = _check_X(X)
# set n_features_in_ attribute
_check_n_features(self, X, reset=True)
Expand Down Expand Up @@ -1380,7 +1375,7 @@ def make_column_transformer(
n_jobs=None,
verbose=False,
verbose_feature_names_out=True,
force_int_remainder_cols=True,
force_int_remainder_cols="deprecated",
):
"""Construct a ColumnTransformer from the given transformers.

Expand Down Expand Up @@ -1454,10 +1449,7 @@ def make_column_transformer(
.. versionadded:: 1.0

force_int_remainder_cols : bool, default=True
Force the columns of the last entry of `transformers_`, which
corresponds to the "remainder" transformer, to always be stored as
indices (int) rather than column names (str). See description of the
:attr:`ColumnTransformer.transformers_` attribute for details.
This parameter has no effect.

.. note::
If you do not access the list of columns for the remainder columns
Expand All @@ -1470,6 +1462,9 @@ def make_column_transformer(
The default value for `force_int_remainder_cols` will change from
`True` to `False` in version 1.7.

.. deprecated:: 1.7
`force_int_remainder_cols` is deprecated and will be removed in version 1.9.

Returns
-------
ct : ColumnTransformer
Expand Down Expand Up @@ -1596,105 +1591,6 @@ def __call__(self, df):
return cols.tolist()


class _RemainderColsList(UserList):
"""A list that raises a warning whenever items are accessed.

It is used to store the columns handled by the "remainder" entry of
``ColumnTransformer.transformers_``, ie ``transformers_[-1][-1]``.

For some values of the ``ColumnTransformer`` ``transformers`` parameter,
this list of indices will be replaced by either a list of column names or a
boolean mask; in those cases we emit a ``FutureWarning`` the first time an
element is accessed.

Parameters
----------
columns : list of int
The remainder columns.

future_dtype : {'str', 'bool'}, default=None
The dtype that will be used by a ColumnTransformer with the same inputs
in a future release. There is a default value because providing a
constructor that takes a single argument is a requirement for
subclasses of UserList, but we do not use it in practice. It would only
be used if a user called methods that return a new list such are
copying or concatenating `_RemainderColsList`.

warning_was_emitted : bool, default=False
Whether the warning for that particular list was already shown, so we
only emit it once.

warning_enabled : bool, default=True
When False, the list never emits the warning nor updates
`warning_was_emitted``. This is used to obtain a quiet copy of the list
for use by the `ColumnTransformer` itself, so that the warning is only
shown when a user accesses it directly.
"""

def __init__(
self,
columns,
*,
future_dtype=None,
warning_was_emitted=False,
warning_enabled=True,
):
super().__init__(columns)
self.future_dtype = future_dtype
self.warning_was_emitted = warning_was_emitted
self.warning_enabled = warning_enabled

def __getitem__(self, index):
self._show_remainder_cols_warning()
return super().__getitem__(index)

def _show_remainder_cols_warning(self):
if self.warning_was_emitted or not self.warning_enabled:
return
self.warning_was_emitted = True
future_dtype_description = {
"str": "column names (of type str)",
"bool": "a mask array (of type bool)",
# shouldn't happen because we always initialize it with a
# non-default future_dtype
None: "a different type depending on the ColumnTransformer inputs",
}.get(self.future_dtype, self.future_dtype)

# TODO(1.7) Update the warning to say that the old behavior will be
# removed in 1.9.
warnings.warn(
(
"\nThe format of the columns of the 'remainder' transformer in"
" ColumnTransformer.transformers_ will change in version 1.7 to"
" match the format of the other transformers.\nAt the moment the"
" remainder columns are stored as indices (of type int). With the same"
" ColumnTransformer configuration, in the future they will be stored"
f" as {future_dtype_description}.\nTo use the new behavior now and"
" suppress this warning, use"
" ColumnTransformer(force_int_remainder_cols=False).\n"
),
category=FutureWarning,
)

def _repr_pretty_(self, printer, *_):
"""Override display in ipython console, otherwise the class name is shown."""
printer.text(repr(self.data))


def _with_dtype_warning_enabled_set_to(warning_enabled, transformers):
result = []
for name, trans, columns in transformers:
if isinstance(columns, _RemainderColsList):
columns = _RemainderColsList(
columns.data,
future_dtype=columns.future_dtype,
warning_was_emitted=columns.warning_was_emitted,
warning_enabled=warning_enabled,
)
result.append((name, trans, columns))
return result


def _feature_names_out_with_str_format(
transformer_name: str, feature_name: str, str_format: str
) -> str:
Expand Down
Loading