Skip to content

Commit 36d056f

Browse files
authored
MNT Clean-up deprecations for 1.7: Remainder column type of ColumnTransformer (#31167)
1 parent 1527b1f commit 36d056f

File tree

3 files changed

+62
-208
lines changed

3 files changed

+62
-208
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
- The `force_int_remainder_cols` parameter of :class:`compose.ColumnTransformer` and
2+
:func:`compose.make_column_transformer` is deprecated and will be removed in 1.9.
3+
It has no effect.
4+
By :user:`Jérémie du Boisberranger <jeremiedbb>`

sklearn/compose/_column_transformer.py

+28-132
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# SPDX-License-Identifier: BSD-3-Clause
99

1010
import warnings
11-
from collections import Counter, UserList
11+
from collections import Counter
1212
from functools import partial
1313
from itertools import chain
1414
from numbers import Integral, Real
@@ -161,11 +161,8 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
161161
.. versionchanged:: 1.6
162162
`verbose_feature_names_out` can be a callable or a string to be formatted.
163163
164-
force_int_remainder_cols : bool, default=True
165-
Force the columns of the last entry of `transformers_`, which
166-
corresponds to the "remainder" transformer, to always be stored as
167-
indices (int) rather than column names (str). See description of the
168-
`transformers_` attribute for details.
164+
force_int_remainder_cols : bool, default=False
165+
This parameter has no effect.
169166
170167
.. note::
171168
If you do not access the list of columns for the remainder columns
@@ -178,6 +175,9 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
178175
The default value for `force_int_remainder_cols` will change from
179176
`True` to `False` in version 1.7.
180177
178+
.. deprecated:: 1.7
179+
`force_int_remainder_cols` is deprecated and will be removed in 1.9.
180+
181181
Attributes
182182
----------
183183
transformers_ : list
@@ -192,16 +192,12 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
192192
``len(transformers_)==len(transformers)+1``, otherwise
193193
``len(transformers_)==len(transformers)``.
194194
195-
.. versionchanged:: 1.5
196-
If there are remaining columns and `force_int_remainder_cols` is
197-
True, the remaining columns are always represented by their
198-
positional indices in the input `X` (as in older versions). If
199-
`force_int_remainder_cols` is False, the format attempts to match
200-
that of the other transformers: if all columns were provided as
201-
column names (`str`), the remaining columns are stored as column
202-
names; if all columns were provided as mask arrays (`bool`), so are
203-
the remaining columns; in all other cases the remaining columns are
204-
stored as indices (`int`).
195+
.. versionadded:: 1.7
196+
The format of the remaining columns now attempts to match that of the other
197+
transformers: if all columns were provided as column names (`str`), the
198+
remaining columns are stored as column names; if all columns were provided
199+
as mask arrays (`bool`), so are the remaining columns; in all other cases
200+
the remaining columns are stored as indices (`int`).
205201
206202
named_transformers_ : :class:`~sklearn.utils.Bunch`
207203
Read-only attribute to access any transformer by given name.
@@ -300,7 +296,7 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
300296
"transformer_weights": [dict, None],
301297
"verbose": ["verbose"],
302298
"verbose_feature_names_out": ["boolean", str, callable],
303-
"force_int_remainder_cols": ["boolean"],
299+
"force_int_remainder_cols": ["boolean", Hidden(StrOptions({"deprecated"}))],
304300
}
305301

306302
def __init__(
@@ -313,7 +309,7 @@ def __init__(
313309
transformer_weights=None,
314310
verbose=False,
315311
verbose_feature_names_out=True,
316-
force_int_remainder_cols=True,
312+
force_int_remainder_cols="deprecated",
317313
):
318314
self.transformers = transformers
319315
self.remainder = remainder
@@ -477,13 +473,6 @@ def _iter(self, fitted, column_as_labels, skip_drop, skip_empty_columns):
477473
if self._remainder[2]:
478474
transformers = chain(transformers, [self._remainder])
479475

480-
# We want the warning about the future change of the remainder
481-
# columns dtype to be shown only when a user accesses them
482-
# directly, not when they are used by the ColumnTransformer itself.
483-
# We disable warnings here; they are enabled when setting
484-
# self.transformers_.
485-
transformers = _with_dtype_warning_enabled_set_to(False, transformers)
486-
487476
get_weight = (self.transformer_weights or {}).get
488477

489478
for name, trans, columns in transformers:
@@ -578,8 +567,6 @@ def _get_remainder_cols_dtype(self):
578567

579568
def _get_remainder_cols(self, indices):
580569
dtype = self._get_remainder_cols_dtype()
581-
if self.force_int_remainder_cols and dtype != "int":
582-
return _RemainderColsList(indices, future_dtype=dtype)
583570
if dtype == "str":
584571
return list(self.feature_names_in_[indices])
585572
if dtype == "bool":
@@ -753,7 +740,7 @@ def _update_fitted_transformers(self, transformers):
753740

754741
# sanity check that transformers is exhausted
755742
assert not list(fitted_transformers)
756-
self.transformers_ = _with_dtype_warning_enabled_set_to(True, transformers_)
743+
self.transformers_ = transformers_
757744

758745
def _validate_output(self, result):
759746
"""
@@ -984,6 +971,14 @@ def fit_transform(self, X, y=None, **params):
984971
_raise_for_params(params, self, "fit_transform")
985972
_check_feature_names(self, X, reset=True)
986973

974+
if self.force_int_remainder_cols != "deprecated":
975+
warnings.warn(
976+
"The parameter `force_int_remainder_cols` is deprecated and will be "
977+
"removed in 1.9. It has no effect. Leave it to its default value to "
978+
"avoid this warning.",
979+
FutureWarning,
980+
)
981+
987982
X = _check_X(X)
988983
# set n_features_in_ attribute
989984
_check_n_features(self, X, reset=True)
@@ -1380,7 +1375,7 @@ def make_column_transformer(
13801375
n_jobs=None,
13811376
verbose=False,
13821377
verbose_feature_names_out=True,
1383-
force_int_remainder_cols=True,
1378+
force_int_remainder_cols="deprecated",
13841379
):
13851380
"""Construct a ColumnTransformer from the given transformers.
13861381
@@ -1454,10 +1449,7 @@ def make_column_transformer(
14541449
.. versionadded:: 1.0
14551450
14561451
force_int_remainder_cols : bool, default=True
1457-
Force the columns of the last entry of `transformers_`, which
1458-
corresponds to the "remainder" transformer, to always be stored as
1459-
indices (int) rather than column names (str). See description of the
1460-
:attr:`ColumnTransformer.transformers_` attribute for details.
1452+
This parameter has no effect.
14611453
14621454
.. note::
14631455
If you do not access the list of columns for the remainder columns
@@ -1470,6 +1462,9 @@ def make_column_transformer(
14701462
The default value for `force_int_remainder_cols` will change from
14711463
`True` to `False` in version 1.7.
14721464
1465+
.. deprecated:: 1.7
1466+
`force_int_remainder_cols` is deprecated and will be removed in version 1.9.
1467+
14731468
Returns
14741469
-------
14751470
ct : ColumnTransformer
@@ -1596,105 +1591,6 @@ def __call__(self, df):
15961591
return cols.tolist()
15971592

15981593

1599-
class _RemainderColsList(UserList):
1600-
"""A list that raises a warning whenever items are accessed.
1601-
1602-
It is used to store the columns handled by the "remainder" entry of
1603-
``ColumnTransformer.transformers_``, ie ``transformers_[-1][-1]``.
1604-
1605-
For some values of the ``ColumnTransformer`` ``transformers`` parameter,
1606-
this list of indices will be replaced by either a list of column names or a
1607-
boolean mask; in those cases we emit a ``FutureWarning`` the first time an
1608-
element is accessed.
1609-
1610-
Parameters
1611-
----------
1612-
columns : list of int
1613-
The remainder columns.
1614-
1615-
future_dtype : {'str', 'bool'}, default=None
1616-
The dtype that will be used by a ColumnTransformer with the same inputs
1617-
in a future release. There is a default value because providing a
1618-
constructor that takes a single argument is a requirement for
1619-
subclasses of UserList, but we do not use it in practice. It would only
1620-
be used if a user called methods that return a new list such are
1621-
copying or concatenating `_RemainderColsList`.
1622-
1623-
warning_was_emitted : bool, default=False
1624-
Whether the warning for that particular list was already shown, so we
1625-
only emit it once.
1626-
1627-
warning_enabled : bool, default=True
1628-
When False, the list never emits the warning nor updates
1629-
`warning_was_emitted``. This is used to obtain a quiet copy of the list
1630-
for use by the `ColumnTransformer` itself, so that the warning is only
1631-
shown when a user accesses it directly.
1632-
"""
1633-
1634-
def __init__(
1635-
self,
1636-
columns,
1637-
*,
1638-
future_dtype=None,
1639-
warning_was_emitted=False,
1640-
warning_enabled=True,
1641-
):
1642-
super().__init__(columns)
1643-
self.future_dtype = future_dtype
1644-
self.warning_was_emitted = warning_was_emitted
1645-
self.warning_enabled = warning_enabled
1646-
1647-
def __getitem__(self, index):
1648-
self._show_remainder_cols_warning()
1649-
return super().__getitem__(index)
1650-
1651-
def _show_remainder_cols_warning(self):
1652-
if self.warning_was_emitted or not self.warning_enabled:
1653-
return
1654-
self.warning_was_emitted = True
1655-
future_dtype_description = {
1656-
"str": "column names (of type str)",
1657-
"bool": "a mask array (of type bool)",
1658-
# shouldn't happen because we always initialize it with a
1659-
# non-default future_dtype
1660-
None: "a different type depending on the ColumnTransformer inputs",
1661-
}.get(self.future_dtype, self.future_dtype)
1662-
1663-
# TODO(1.7) Update the warning to say that the old behavior will be
1664-
# removed in 1.9.
1665-
warnings.warn(
1666-
(
1667-
"\nThe format of the columns of the 'remainder' transformer in"
1668-
" ColumnTransformer.transformers_ will change in version 1.7 to"
1669-
" match the format of the other transformers.\nAt the moment the"
1670-
" remainder columns are stored as indices (of type int). With the same"
1671-
" ColumnTransformer configuration, in the future they will be stored"
1672-
f" as {future_dtype_description}.\nTo use the new behavior now and"
1673-
" suppress this warning, use"
1674-
" ColumnTransformer(force_int_remainder_cols=False).\n"
1675-
),
1676-
category=FutureWarning,
1677-
)
1678-
1679-
def _repr_pretty_(self, printer, *_):
1680-
"""Override display in ipython console, otherwise the class name is shown."""
1681-
printer.text(repr(self.data))
1682-
1683-
1684-
def _with_dtype_warning_enabled_set_to(warning_enabled, transformers):
1685-
result = []
1686-
for name, trans, columns in transformers:
1687-
if isinstance(columns, _RemainderColsList):
1688-
columns = _RemainderColsList(
1689-
columns.data,
1690-
future_dtype=columns.future_dtype,
1691-
warning_was_emitted=columns.warning_was_emitted,
1692-
warning_enabled=warning_enabled,
1693-
)
1694-
result.append((name, trans, columns))
1695-
return result
1696-
1697-
16981594
def _feature_names_out_with_str_format(
16991595
transformer_name: str, feature_name: str, str_format: str
17001596
) -> str:

0 commit comments

Comments
 (0)