Skip to content

Commit 277e125

Browse files
jorisvandenbosscheGaelVaroquaux
authored andcommitted
Change default of ColumnTransformer remainder from passthrough to drop (#11603)
1 parent 9111064 commit 277e125

File tree

3 files changed

+30
-28
lines changed

3 files changed

+30
-28
lines changed

doc/modules/compose.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ variable, but apply a :class:`feature_extraction.text.CountVectorizer
413413
<sklearn.feature_extraction.text.CountVectorizer>` to the ``'title'`` column.
414414
As we might use multiple feature extraction methods on the same column, we give
415415
each transformer a unique name, say ``'city_category'`` and ``'title_bow'``.
416-
We can ignore the remaining rating columns by setting ``remainder='drop'``::
416+
By default, the remaining rating columns are ignored (``remainder='drop'``)::
417417

418418
>>> from sklearn.compose import ColumnTransformer
419419
>>> from sklearn.feature_extraction.text import CountVectorizer
@@ -495,7 +495,7 @@ above example would be::
495495
... ('city', CountVectorizer(analyzer=lambda x: [x])),
496496
... ('title', CountVectorizer()))
497497
>>> column_trans # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
498-
ColumnTransformer(n_jobs=1, remainder='passthrough', transformer_weights=None,
498+
ColumnTransformer(n_jobs=1, remainder='drop', transformer_weights=None,
499499
transformers=[('countvectorizer-1', ...)
500500

501501
.. topic:: Examples:

sklearn/compose/_column_transformer.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,14 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
7171
A callable is passed the input data `X` and can return any of the
7272
above.
7373
74-
remainder : {'passthrough', 'drop'} or estimator, default 'passthrough'
75-
By default, all remaining columns that were not specified in
76-
`transformers` will be automatically passed through (default of
77-
``'passthrough'``). This subset of columns is concatenated with the
78-
output of the transformers.
79-
By using ``remainder='drop'``, only the specified columns in
80-
`transformers` are transformed and combined in the output, and the
81-
non-specified columns are dropped.
74+
remainder : {'passthrough', 'drop'} or estimator, default 'drop'
75+
By default, only the specified columns in `transformers` are
76+
transformed and combined in the output, and the non-specified
77+
columns are dropped. (default of ``'drop'``).
78+
By specifying ``remainder='passthrough'``, all remaining columns that
79+
were not specified in `transformers` will be automatically passed
80+
through. This subset of columns is concatenated with the output of
81+
the transformers.
8282
By setting ``remainder`` to be an estimator, the remaining
8383
non-specified columns will use the ``remainder`` estimator. The
8484
estimator must support `fit` and `transform`.
@@ -141,7 +141,7 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
141141
142142
"""
143143

144-
def __init__(self, transformers, remainder='passthrough', n_jobs=1,
144+
def __init__(self, transformers, remainder='drop', n_jobs=1,
145145
transformer_weights=None):
146146
self.transformers = transformers
147147
self.remainder = remainder
@@ -658,8 +658,7 @@ def make_column_transformer(*transformers, **kwargs):
658658
... (['numerical_column'], StandardScaler()),
659659
... (['categorical_column'], OneHotEncoder()))
660660
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
661-
ColumnTransformer(n_jobs=1, remainder='passthrough',
662-
transformer_weights=None,
661+
ColumnTransformer(n_jobs=1, remainder='drop', transformer_weights=None,
663662
transformers=[('standardscaler',
664663
StandardScaler(...),
665664
['numerical_column']),
@@ -669,7 +668,7 @@ def make_column_transformer(*transformers, **kwargs):
669668
670669
"""
671670
n_jobs = kwargs.pop('n_jobs', 1)
672-
remainder = kwargs.pop('remainder', 'passthrough')
671+
remainder = kwargs.pop('remainder', 'drop')
673672
if kwargs:
674673
raise TypeError('Unknown keyword arguments: "{}"'
675674
.format(list(kwargs.keys())[0]))

sklearn/compose/tests/test_column_transformer.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def test_column_transformer_get_set_params():
405405
('trans2', StandardScaler(), [1])])
406406

407407
exp = {'n_jobs': 1,
408-
'remainder': 'passthrough',
408+
'remainder': 'drop',
409409
'trans1': ct.transformers[0][1],
410410
'trans1__copy': True,
411411
'trans1__with_mean': True,
@@ -424,7 +424,7 @@ def test_column_transformer_get_set_params():
424424

425425
ct.set_params(trans1='passthrough')
426426
exp = {'n_jobs': 1,
427-
'remainder': 'passthrough',
427+
'remainder': 'drop',
428428
'trans1': 'passthrough',
429429
'trans2': ct.transformers[1][1],
430430
'trans2__copy': True,
@@ -492,7 +492,8 @@ def test_column_transformer_get_feature_names():
492492
NotImplementedError, 'get_feature_names is not yet supported',
493493
ct.get_feature_names)
494494

495-
ct = ColumnTransformer([('trans', DictVectorizer(), 0)])
495+
ct = ColumnTransformer([('trans', DictVectorizer(), 0)],
496+
remainder='passthrough')
496497
ct.fit(X)
497498
assert_raise_message(
498499
NotImplementedError, 'get_feature_names is not yet supported',
@@ -552,23 +553,22 @@ def test_column_transformer_remainder():
552553
X_res_second = np.array([2, 4, 6]).reshape(-1, 1)
553554
X_res_both = X_array
554555

555-
# default passthrough
556-
ct = ColumnTransformer([('trans', Trans(), [0])])
557-
assert_array_equal(ct.fit_transform(X_array), X_res_both)
558-
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
556+
# default drop
557+
ct = ColumnTransformer([('trans1', Trans(), [0])])
558+
assert_array_equal(ct.fit_transform(X_array), X_res_first)
559+
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
559560
assert len(ct.transformers_) == 2
560561
assert ct.transformers_[-1][0] == 'remainder'
561-
assert ct.transformers_[-1][1] == 'passthrough'
562+
assert ct.transformers_[-1][1] == 'drop'
562563
assert_array_equal(ct.transformers_[-1][2], [1])
563564

564-
# specify to drop remaining columns
565-
ct = ColumnTransformer([('trans1', Trans(), [0])],
566-
remainder='drop')
567-
assert_array_equal(ct.fit_transform(X_array), X_res_first)
568-
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
565+
# specify passthrough
566+
ct = ColumnTransformer([('trans', Trans(), [0])], remainder='passthrough')
567+
assert_array_equal(ct.fit_transform(X_array), X_res_both)
568+
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
569569
assert len(ct.transformers_) == 2
570570
assert ct.transformers_[-1][0] == 'remainder'
571-
assert ct.transformers_[-1][1] == 'drop'
571+
assert ct.transformers_[-1][1] == 'passthrough'
572572
assert_array_equal(ct.transformers_[-1][2], [1])
573573

574574
# column order is not preserved (passed through added to end)
@@ -602,6 +602,9 @@ def test_column_transformer_remainder():
602602
"remainder keyword needs to be one of \'drop\', \'passthrough\', "
603603
"or estimator.", ct.fit_transform, X_array)
604604

605+
# check default for make_column_transformer
606+
ct = make_column_transformer(([0], Trans()))
607+
assert ct.remainder == 'drop'
605608

606609
@pytest.mark.parametrize("key", [[0], np.array([0]), slice(0, 1),
607610
np.array([True, False])])

0 commit comments

Comments
 (0)