scikit-learn · jnothman · Jan 15, 2019 · Jan 4, 2019 · Jan 11, 2019 · Jan 11, 2019
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -2,6 +2,27 @@
 
 .. currentmodule:: sklearn
 
+ .. _changes_0_20_3:
+
+Version 0.20.3
+==============
+
+**??, 2019**
+
+This is a bug-fix release with some minor documentation improvements and
+enhancements to features released in 0.20.0.
+
+Changelog
+---------
+
+:mod:`sklearn.preprocessing`
+............................
+
+- |Fix| Fixed a bug in :class:`preprocessing.OneHotEncoder` where the
+  deprecation of ``categorical_features`` was handled incorrectly in
+  combination with ``handle_unknown='ignore'``.
+  :issue:`12881` by `Joris Van den Bossche`_.
+
 .. _changes_0_20_2:
 
 Version 0.20.2

diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
@@ -331,6 +331,9 @@ def _handle_deprecations(self, X):
             self._legacy_mode = True
 
         else:  # n_values = 'auto'
+            # n_values can also be None (default to catch usage), so set
+            # _n_values to 'auto' explicitly
+            self._n_values = 'auto'
             if self.handle_unknown == 'ignore':
                 # no change in behaviour, no need to raise deprecation warning
                 self._legacy_mode = False
@@ -366,7 +369,6 @@ def _handle_deprecations(self, X):
                     )
                     warnings.warn(msg, FutureWarning)
                     self._legacy_mode = True
-                    self._n_values = 'auto'
 
         # if user specified categorical_features -> always use legacy mode
         if self.categorical_features is not None:
@@ -452,7 +454,7 @@ def _legacy_fit_transform(self, X):
             except (ValueError, TypeError):
                 raise TypeError("Wrong type for parameter `n_values`. Expected"
                                 " 'auto', int or array of ints, got %r"
-                                % type(X))
+                                % type(self._n_values))
             if n_values.ndim < 1 or n_values.shape[0] != X.shape[1]:
                 raise ValueError("Shape mismatch: if n_values is an array,"
                                  " it has to be of shape (n_features,).")

diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
@@ -226,6 +226,18 @@ def test_one_hot_encoder_categorical_features():
     assert_raises(ValueError, oh.fit, X)
 
 
+def test_one_hot_encoder_categorical_features_ignore_unknown():
+    # GH12881 bug in combination of categorical_features with ignore
+    X = np.array([[1, 2, 3], [4, 5, 6], [2, 3, 2]]).T
+    oh = OneHotEncoder(categorical_features=[2], handle_unknown='ignore')
+
+    with ignore_warnings(category=DeprecationWarning):
+        res = oh.fit_transform(X)
+
+    expected = np.array([[1, 0, 1], [0, 1, 0], [1, 2, 3], [4, 5, 6]]).T
+    assert_array_equal(res.toarray(), expected)
+
+
 def test_one_hot_encoder_handle_unknown():
     X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]])
     X2 = np.array([[4, 1, 1]])