Skip to content

Commit 45ab4c6

Browse files
jamesmyattjnothman
authored andcommitted
[MRG] Fixes get_feature_names results when using drop functionality (scikit-learn#13894)
1 parent b1a531a commit 45ab4c6

File tree

3 files changed

+28
-4
lines changed

3 files changed

+28
-4
lines changed

doc/whats_new/v0.21.rst

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,17 @@ Changelog
2121
:mod:`sklearn.metrics`
2222
......................
2323

24-
- |Fix| Fixed a bug in :func:`euclidean_distances` where a part of the distance
25-
matrix was left un-instanciated for suffiently large float32 datasets
26-
(regression introduced in 0.21) :issue:`13910`
27-
by :user:`Jérémie du Boisberranger <jeremiedbb>`.
24+
- |Fix| Fixed a bug in :func:`metrics.euclidean_distances` where a part of the
25+
distance matrix was left un-instanciated for suffiently large float32
26+
datasets (regression introduced in 0.21). :pr:`13910` by :user:`Jérémie du
27+
Boisberranger <jeremiedbb>`.
28+
29+
:mod:`sklearn.preprocessing`
30+
............................
31+
32+
- |Fix| Fixed a bug in :class:`preprocessing.OneHotEncoder` where the new
33+
`drop` parameter was not reflected in `get_feature_names`. :pr:`13894`
34+
by :user:`James Myatt <jamesmyatt>`.
2835

2936
.. _changes_0_21_1:
3037

sklearn/preprocessing/_encoders.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,8 @@ def get_feature_names(self, input_features=None):
848848
for i in range(len(cats)):
849849
names = [
850850
input_features[i] + '_' + str(t) for t in cats[i]]
851+
if self.drop is not None:
852+
names.pop(self.drop_idx_[i])
851853
feature_names.extend(names)
852854

853855
return np.array(feature_names, dtype=object)

sklearn/preprocessing/tests/test_encoders.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,21 @@ def test_one_hot_encoder_feature_names_unicode():
590590
assert_array_equal(['n👍me_c❤t1', 'n👍me_dat2'], feature_names)
591591

592592

593+
@pytest.mark.parametrize("drop, expected_names",
594+
[('first', ['x0_c', 'x2_b']),
595+
(['c', 2, 'b'], ['x0_b', 'x2_a'])],
596+
ids=['first', 'manual'])
597+
def test_one_hot_encoder_feature_names_drop(drop, expected_names):
598+
X = [['c', 2, 'a'],
599+
['b', 2, 'b']]
600+
601+
ohe = OneHotEncoder(drop=drop)
602+
ohe.fit(X)
603+
feature_names = ohe.get_feature_names()
604+
assert isinstance(feature_names, np.ndarray)
605+
assert_array_equal(expected_names, feature_names)
606+
607+
593608
@pytest.mark.parametrize("X", [np.array([[1, np.nan]]).T,
594609
np.array([['a', np.nan]], dtype=object).T],
595610
ids=['numeric', 'object'])

0 commit comments

Comments
 (0)