diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index e2840272bff5d..0ce4100506418 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -39,6 +39,13 @@ Changelog function to return 0 when two all-zero vectors are compared. :issue:`12685` by :user:`Thomas Fan `. +:mod:`sklearn.utils` +.................... + +- |Fix| Calling :func:`utils.check_array` on `pandas.Series` with categorical + data, which raised an error in 0.20.0, now returns the expected output again. + :issue:`12699` by `Joris Van den Bossche`_. + .. _changes_0_20_1: Version 0.20.1 diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index bb0d6e1ec5fc1..d2c0ca2921d58 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -701,6 +701,11 @@ def test_check_array_series(): warn_on_dtype=True) assert_array_equal(res, np.array([1, 2, 3])) + # with categorical dtype (not a numpy dtype) (GH12699) + s = pd.Series(['a', 'b', 'c']).astype('category') + res = check_array(s, dtype=None, ensure_2d=False) + assert_array_equal(res, np.array(['a', 'b', 'c'], dtype=object)) + def test_check_dataframe_warns_on_dtype(): # Check that warn_on_dtype also works for DataFrames. diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index ea2606fe6b6eb..72e64d3214000 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -477,7 +477,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, # check if the object contains several dtypes (typically a pandas # DataFrame), and store them. If not, store None. dtypes_orig = None - if hasattr(array, "dtypes") and len(array.dtypes): + if hasattr(array, "dtypes") and hasattr(array.dtypes, '__array__'): dtypes_orig = np.array(array.dtypes) if dtype_numeric: