From f4754a937548518d63f6fa3320f94ce951a078af Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 17 May 2019 15:21:27 -0400
Subject: [PATCH 01/39] TST Adds tests

---
 sklearn/datasets/openml.py            |  33 +++--
 sklearn/datasets/tests/test_openml.py | 197 +++++++++++++++++++++++++-
 2 files changed, 218 insertions(+), 12 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 6f76ee15e2e40..379fc2f5e93e5 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -436,7 +436,8 @@ def _valid_data_column_names(features_list, target_columns):
 
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
-                 target_column='default-target', cache=True, return_X_y=False):
+                 target_column='default-target', cache=True, return_X_y=False,
+                 return_frame=False):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -489,24 +490,33 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns ``(data, target)`` instead of a Bunch object. See
         below for more information about the `data` and `target` objects.
 
+    return_frame : boolean, default=False
+        If True, returns a Bunch where the data attribute is a pandas
+        DataFrame.
+
     Returns
     -------
 
     data : Bunch
         Dictionary-like object, with attributes:
 
-        data : np.array or scipy.sparse.csr_matrix of floats
+        data : np.array, scipy.sparse.csr_matrix of floats, or pandas Dataframe
             The feature matrix. Categorical features are encoded as ordinals.
-        target : np.array
+            If ``return_frame`` is True, this is a pandas DataFrame.
+        target : np.array or None
             The regression target or classification labels, if applicable.
             Dtype is float if numeric, and object if categorical.
+            If ``return_frame`` is True, this is None.
         DESCR : str
             The full description of the dataset
         feature_names : list
             The names of the dataset columns
-        categories : dict
+        target_names : list
+            The names of the target columns
+        categories : dict or None
             Maps each categorical feature name to a list of values, such
-            that the value encoded as i is ith in the list.
+            that the value encoded as i is ith in the list. If ``return_frame``
+            is True, this is None.
         details : dict
             More metadata from OpenML
 
@@ -571,11 +581,14 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
 
-    for feature in features_list:
-        if 'true' in (feature['is_ignore'], feature['is_row_identifier']):
-            continue
-        if feature['data_type'] == 'string':
-            raise ValueError('STRING attributes are not yet supported')
+    if not return_frame:
+        for feature in features_list:
+            if 'true' in (feature['is_ignore'], feature['is_row_identifier']):
+                continue
+            if feature['data_type'] == 'string':
+                raise ValueError('STRING attributes are not supported for '
+                                 'arrays as a return value. Try '
+                                 'return_frame=True')
 
     if target_column == "default-target":
         # determines the default target based on the data feature results
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 9c8200731aa6d..a3b7be2604250 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -255,6 +255,198 @@ def _mock_urlopen(request):
         context.setattr(sklearn.datasets.openml, 'urlopen', _mock_urlopen)
 
 
+def test_fetch_openml_iris_pandas(monkeypatch):
+    # classification dataset with numeric only columns
+    pd = pytest.importorskip('pandas')
+    data_id = 61
+    expected_shape = (150, 5)
+
+    cat_dtype = pd.CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                     'Iris-virginica'])
+    expected_dtypes = [np.float64] * 4 + [cat_dtype]
+    expected_feature_names = ['sepallength', 'sepalwidth', 'petallength',
+                              'petalwidth']
+    expected_target_names = ['class']
+    expected_columns = expected_feature_names + expected_target_names
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+    df = bunch.data
+
+    assert np.all(df.dtypes == expected_dtypes)
+    assert df.shape == expected_shape
+    assert np.all(df.columns == expected_columns)
+    assert np.all(bunch.feature_names == expected_feature_names)
+    assert np.all(bunch.target_names == expected_target_names)
+
+
+def test_fetch_openml_anneal_pandas(monkeypatch):
+    # classification dataset with numeric and categorical columns
+    pd = pytest.importorskip('pandas')
+    data_id = 2
+    target_column = 'class'
+    expected_shape = (11, 39)
+    expected_categories = 33
+    expected_floats = 6
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    bunch = fetch_openml(data_id=data_id, return_frame=True,
+                         target_column=target_column, cache=False)
+
+    df = bunch.data
+    assert df.shape == expected_shape
+
+    n_categories = len([dtype for dtype in df.dtypes
+                       if isinstance(dtype, pd.CategoricalDtype)])
+    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
+    assert expected_categories == n_categories
+    assert expected_floats == n_floats
+    assert np.all(bunch.target_names == target_column)
+
+
+def test_fetch_openml_cpu_pandas(monkeypatch):
+    # regression dataset with numeric and categorical columns
+    pd = pytest.importorskip('pandas')
+    data_id = 561
+    expected_shape = (209, 8)
+
+    cat_dtype = pd.CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
+                                     'bti', 'burroughs', 'c.r.d', 'cdc',
+                                     'cambex', 'dec', 'dg', 'formation',
+                                     'four-phase', 'gould', 'hp', 'harris',
+                                     'honeywell', 'ibm', 'ipl', 'magnuson',
+                                     'microdata', 'nas', 'ncr', 'nixdorf',
+                                     'perkin-elmer', 'prime', 'siemens',
+                                     'sperry', 'sratus', 'wang'])
+    expected_dtypes = [cat_dtype] + [np.float64] * 7
+    expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN',
+                              'CHMAX']
+    expected_target_names = ['class']
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+    df = bunch.data
+
+    assert df.shape == expected_shape
+    assert np.all(df.dtypes == expected_dtypes)
+    assert np.all(df.columns == expected_feature_names + expected_target_names)
+    assert np.all(bunch.feature_names == expected_feature_names)
+    assert np.all(bunch.target_names == expected_target_names)
+
+
+def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
+    data_id = 292
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    msg = ('Cannot return dataframe with sparse data')
+    with pytest.raises(ValueError, match=msg):
+        fetch_openml(data_id=data_id, return_frame=True, cache=False)
+
+
+def test_fetch_openml_adultcensus_pandas(monkeypatch):
+    pd = pytest.importorskip('pandas')
+    # Check because of the numeric row attribute (issue #12329)
+    data_id = 1119
+    expected_shape = (10, 14)
+    expected_categories = 9
+    expected_floats = 7
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+
+    df = bunch.data
+    assert df.shape == expected_shape
+
+    n_categories = len([dtype for dtype in df.dtypes
+                       if isinstance(dtype, pd.CategoricalDtype)])
+    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
+    assert expected_categories == n_categories
+    assert expected_floats == n_floats
+
+
+def test_fetch_openml_miceprotein_pandas(monkeypatch):
+    # JvR: very important check, as this dataset defined several row ids
+    # and ignore attributes. Note that data_features json has 82 attributes,
+    # and row id (1), ignore attributes (3) have been removed.
+    pd = pytest.importorskip('pandas')
+    data_id = 40966
+    expected_shape = (7, 78)
+    expected_floats = 77
+    expected_categories = 5
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+
+    df = bunch.data
+    assert df.shape == expected_shape
+
+    n_categories = len([dtype for dtype in df.dtypes
+                       if isinstance(dtype, pd.CategoricalDtype)])
+    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
+    assert expected_categories == n_categories
+    assert expected_floats == n_floats
+
+
+def test_fetch_openml_emotions_pandas(monkeypatch):
+    # classification dataset with multiple targets (natively)
+    pd = pytest.importorskip('pandas')
+
+    data_id = 40589
+    target_column = ['amazed.suprised', 'happy.pleased', 'relaxing.calm',
+                     'quiet.still', 'sad.lonely', 'angry.aggresive']
+    expected_shape = (13, 78)
+    expected_categories = 6
+    expected_floats = 72
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False,
+                         target_column=target_column)
+
+    df = bunch.data
+    assert df.shape == expected_shape
+
+    n_categories = len([dtype for dtype in df.dtypes
+                       if isinstance(dtype, pd.CategoricalDtype)])
+    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
+    assert expected_categories == n_categories
+    assert expected_floats == n_floats
+    assert np.all(bunch.target_column == target_column)
+
+
+def test_fetch_openml_titanic_pandas(monkeypatch):
+    # dataset with strings
+    pd = pytest.importorskip('pandas')
+
+    data_id = 40945
+    expected_shape = (1309, 14)
+    expected_dtypes = [np.float64, pd.CategoricalDtype(['0', '1']),
+                       object, pd.CategoricalDtype(['female', 'male']),
+                       np.float64, np.float64, np.float64, object,
+                       np.float64, object,
+                       pd.CategoricalDtype(['C', 'Q', 'S']), object,
+                       np.float64, object]
+    expected_columns = ['pclass', 'survived', 'name', 'sex', 'age',
+                        'sibsp', 'parch', 'ticket', 'fare', 'cabin',
+                        'embarked', 'boat', 'body', 'home.dest']
+    expected_feature_names = ['pclass', 'name', 'sex', 'age',
+                              'sibsp', 'parch', 'ticket', 'fare', 'cabin',
+                              'embarked', 'boat', 'body', 'home.dest']
+    expected_target_names = ['survived']
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+
+    df = bunch.data
+    assert df.shape == expected_shape
+    assert np.all(df.dtypes == expected_dtypes)
+    assert np.all(df.columns == expected_columns)
+    assert np.all(bunch.feature_names == expected_feature_names)
+    assert np.all(bunch.target_names == expected_target_names)
+
+
 @pytest.mark.parametrize('gzip_response', [True, False])
 def test_fetch_openml_iris(monkeypatch, gzip_response):
     # classification dataset with numeric only columns
@@ -661,12 +853,13 @@ def test_warn_ignore_attribute(monkeypatch, gzip_response):
 
 
 @pytest.mark.parametrize('gzip_response', [True, False])
-def test_string_attribute(monkeypatch, gzip_response):
+def test_string_attribute_without_dataframe(monkeypatch, gzip_response):
     data_id = 40945
     _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # single column test
     assert_raise_message(ValueError,
-                         'STRING attributes are not yet supported',
+                         ('STRING attributes are not supported for arrays as '
+                          'a return value. Try return_frame=True'),
                          fetch_openml, data_id=data_id, cache=False)
 
 
From e67182e51019501b20196bdb816e84ff05908614 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 17 May 2019 18:06:22 -0400
Subject: [PATCH 02/39] ENH Adds support for dataframes in open_ml

---
 .../plot_column_transformer_mixed_types.py    |  7 +-
 sklearn/datasets/openml.py                    | 85 ++++++++++++++--
 sklearn/datasets/tests/test_openml.py         | 96 ++++++++++++++-----
 sklearn/utils/__init__.py                     | 21 ++++
 4 files changed, 170 insertions(+), 39 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 264ae7495296c..19651cd7cf622 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -24,10 +24,10 @@
 #
 # License: BSD 3 clause
 
-import pandas as pd
 import numpy as np
 
 from sklearn.compose import ColumnTransformer
+from sklearn.datasets import fetch_openml
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
@@ -37,9 +37,8 @@
 np.random.seed(0)
 
 # Read data from Titanic dataset.
-titanic_url = ('https://raw.githubusercontent.com/amueller/'
-               'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')
-data = pd.read_csv(titanic_url)
+titantic = fetch_openml(data_id=40945, return_frame=False)
+data = titantic.data
 
 # We will train our classifier with the following features:
 # Numeric Features:
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 379fc2f5e93e5..44a0b89c188d4 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -8,6 +8,7 @@
 from functools import wraps
 import itertools
 from collections.abc import Generator
+from collections import OrderedDict
 
 from urllib.request import urlopen, Request
 
@@ -18,6 +19,7 @@
 from .base import get_data_home
 from urllib.error import HTTPError
 from ..utils import Bunch
+from ..utils import check_pandas_support  # noqa
 
 __all__ = ['fetch_openml']
 
@@ -263,6 +265,58 @@ def _convert_arff_data(arff_data, col_slice_x, col_slice_y, shape=None):
         raise ValueError('Unexpected Data Type obtained from arff.')
 
 
+def _feature_to_dtype(feature):
+    """Map feature to dtype for pandas DataFrame
+    """
+    if feature["data_type"] == "string":
+        return object
+    elif feature["data_type"] == "nominal":
+        return 'category'
+    # only numeric, integer, real are left
+    elif (feature["number_of_missing_values"] != "0" or
+          feature["data_type"] in ["numeric", "real"]):
+        return np.float64
+    elif feature["data_type"] == "integer":
+        return np.int64
+    raise ValueError("Unsupported feature: {}".format(feature))
+
+
+def _convert_arff_data_dataframe(arrf_data, all_columns, features_dict):
+    """Convert the ARFF object into a pandas DataFrame.
+
+    Parameters
+    ----------
+    arff_data : list or dict
+        as obtained from liac-arff object
+
+    all_columns : list
+        columns to return
+
+    features_dict : OrderedDict
+        map from feature to feature info from openml. This includes
+        columns that are not ignored.
+
+    Returns
+    -------
+    df : pd.DataFrame
+    """
+    check_pandas_support('fetch_openml with return_frame=True')
+    import pandas as pd
+
+    df = pd.DataFrame(arrf_data['data'], columns=list(features_dict.keys()),
+                      dtype=object)
+    df = df[all_columns].copy()
+
+    dtypes = {}
+    for column in all_columns:
+        dtype = _feature_to_dtype(features_dict[column])
+        if dtype == object:
+            continue
+        dtypes[column] = dtype
+
+    return df.astype(dtypes)
+
+
 def _get_data_info_by_name(name, version, data_home):
     """
     Utilizes the openml dataset listing api to find a dataset by
@@ -578,6 +632,13 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         warn("OpenML raised a warning on the dataset. It might be "
              "unusable. Warning: {}".format(data_description['warning']))
 
+    return_sparse = False
+    if data_description['format'].lower() == 'sparse_arff':
+        return_sparse = True
+
+    if return_sparse and return_frame:
+        raise ValueError('Cannot return dataframe with sparse data')
+
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
 
@@ -609,7 +670,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                                             target_column)
 
     # prepare which columns and data types should be returned for the X and y
-    features_dict = {feature['name']: feature for feature in features_list}
+    features_dict = OrderedDict([(feature['name'], feature)
+                                for feature in features_list])
 
     # XXX: col_slice_y should be all nominal or all numeric
     _verify_target_data_type(features_dict, target_column)
@@ -628,10 +690,6 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                              'columns. '.format(feat['name'], nr_missing))
 
     # determine arff encoding to return
-    return_sparse = False
-    if data_description['format'].lower() == 'sparse_arff':
-        return_sparse = True
-
     if not return_sparse:
         data_qualities = _get_data_qualities(data_id, data_home)
         shape = _get_data_shape(data_qualities)
@@ -644,7 +702,18 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     # obtain the data
     arff = _download_data_arff(data_description['file_id'], return_sparse,
-                               data_home)
+                               data_home, encode_nominal=not return_frame)
+
+    description = "{}\n\nDownloaded from openml.org.".format(
+        data_description.pop('description'))
+
+    if return_frame:
+        all_columns = data_columns + target_column
+        df = _convert_arff_data_dataframe(arff, all_columns, features_dict)
+        return Bunch(data=df, target=None, feature_names=data_columns,
+                     target_names=target_column, DESCR=description,
+                     details=data_description, categories=None,
+                     url="https://www.openml.org/d/{}".format(data_id))
 
     # nominal attributes is a dict mapping from the attribute name to the
     # possible values. Includes also the target column (which will be popped
@@ -669,9 +738,6 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         raise ValueError('Mix of nominal and non-nominal targets is not '
                          'currently supported')
 
-    description = "{}\n\nDownloaded from openml.org.".format(
-        data_description.pop('description'))
-
     # reshape y back to 1-D array, if there is only 1 target column; back
     # to None if there are not target columns
     if y.shape[1] == 1:
@@ -684,6 +750,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     bunch = Bunch(
         data=X, target=y, feature_names=data_columns,
+        target_names=target_column,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index a3b7be2604250..034d78f144753 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -14,7 +14,8 @@
                                      _get_data_description_by_id,
                                      _download_data_arff,
                                      _get_local_path,
-                                     _retry_with_clean_cache)
+                                     _retry_with_clean_cache,
+                                     _feature_to_dtype)
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
 from sklearn.utils import is_scalar_nan
@@ -93,10 +94,12 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
     if isinstance(target_column, str):
         # single target, so target is vector
         assert data_by_id.target.shape == (expected_observations, )
+        assert data_by_id.target_names[0] == target_column
     elif isinstance(target_column, list):
         # multi target, so target is array
         assert data_by_id.target.shape == (expected_observations,
                                            len(target_column))
+        assert np.all(data_by_id.target_names == target_column)
     assert data_by_id.data.dtype == np.float64
     assert data_by_id.target.dtype == expected_target_dtype
     assert len(data_by_id.feature_names) == expected_features
@@ -255,6 +258,31 @@ def _mock_urlopen(request):
         context.setattr(sklearn.datasets.openml, 'urlopen', _mock_urlopen)
 
 
+@pytest.mark.parametrize('feature, expected_dtype', [
+    ({'data_type': 'string', 'number_of_missing_values': '0'}, object),
+    ({'data_type': 'string', 'number_of_missing_values': '1'}, object),
+    ({'data_type': 'numeric', 'number_of_missing_values': '0'}, np.float64),
+    ({'data_type': 'numeric', 'number_of_missing_values': '1'}, np.float64),
+    ({'data_type': 'real', 'number_of_missing_values': '0'}, np.float64),
+    ({'data_type': 'real', 'number_of_missing_values': '1'}, np.float64),
+    ({'data_type': 'integer', 'number_of_missing_values': '0'}, np.int64),
+    ({'data_type': 'integer', 'number_of_missing_values': '1'}, np.float64),
+    ({'data_type': 'nominal', 'number_of_missing_values': '0'}, 'category'),
+    ({'data_type': 'nominal', 'number_of_missing_values': '1'}, 'category'),
+])
+def test_feature_to_dtype(feature, expected_dtype):
+    assert _feature_to_dtype(feature) == expected_dtype
+
+
+@pytest.mark.parametrize('feature', [
+    {'data_type': 'datatime', 'number_of_missing_values': '0'}
+])
+def test_feature_to_dtype_error(feature):
+    msg = 'Unsupported feature: {}'.format(feature)
+    with pytest.raises(ValueError, match=msg):
+        _feature_to_dtype(feature)
+
+
 def test_fetch_openml_iris_pandas(monkeypatch):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
@@ -274,6 +302,7 @@ def test_fetch_openml_iris_pandas(monkeypatch):
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
     df = bunch.data
 
+    assert isinstance(df, pd.DataFrame)
     assert np.all(df.dtypes == expected_dtypes)
     assert df.shape == expected_shape
     assert np.all(df.columns == expected_columns)
@@ -294,16 +323,16 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 
     bunch = fetch_openml(data_id=data_id, return_frame=True,
                          target_column=target_column, cache=False)
-
     df = bunch.data
-    assert df.shape == expected_shape
 
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
                        if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
-    assert np.all(bunch.target_names == target_column)
+    assert np.all(bunch.target_names == [target_column])
 
 
 def test_fetch_openml_cpu_pandas(monkeypatch):
@@ -329,6 +358,7 @@ def test_fetch_openml_cpu_pandas(monkeypatch):
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
     df = bunch.data
 
+    assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     assert np.all(df.dtypes == expected_dtypes)
     assert np.all(df.columns == expected_feature_names + expected_target_names)
@@ -341,7 +371,7 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    msg = ('Cannot return dataframe with sparse data')
+    msg = 'Cannot return dataframe with sparse data'
     with pytest.raises(ValueError, match=msg):
         fetch_openml(data_id=data_id, return_frame=True, cache=False)
 
@@ -350,16 +380,16 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
     # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
-    expected_shape = (10, 14)
+    expected_shape = (10, 15)
     expected_categories = 9
-    expected_floats = 7
+    expected_floats = 6
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-
     df = bunch.data
-    assert df.shape == expected_shape
 
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
                        if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
@@ -375,14 +405,15 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     data_id = 40966
     expected_shape = (7, 78)
     expected_floats = 77
-    expected_categories = 5
+    expected_categories = 1
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
 
     df = bunch.data
-    assert df.shape == expected_shape
 
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
                        if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
@@ -406,14 +437,15 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
                          target_column=target_column)
 
     df = bunch.data
-    assert df.shape == expected_shape
 
+    assert isinstance(df, pd.DataFrame)
+    assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
                        if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
-    assert np.all(bunch.target_column == target_column)
+    assert np.all(bunch.target_names == target_column)
 
 
 def test_fetch_openml_titanic_pandas(monkeypatch):
@@ -422,24 +454,36 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
 
     data_id = 40945
     expected_shape = (1309, 14)
-    expected_dtypes = [np.float64, pd.CategoricalDtype(['0', '1']),
-                       object, pd.CategoricalDtype(['female', 'male']),
-                       np.float64, np.float64, np.float64, object,
-                       np.float64, object,
-                       pd.CategoricalDtype(['C', 'Q', 'S']), object,
-                       np.float64, object]
-    expected_columns = ['pclass', 'survived', 'name', 'sex', 'age',
-                        'sibsp', 'parch', 'ticket', 'fare', 'cabin',
-                        'embarked', 'boat', 'body', 'home.dest']
-    expected_feature_names = ['pclass', 'name', 'sex', 'age',
-                              'sibsp', 'parch', 'ticket', 'fare', 'cabin',
-                              'embarked', 'boat', 'body', 'home.dest']
+    name_to_dtype = {
+        'pclass': np.float64,
+        'name': object,
+        'sex': pd.CategoricalDtype(['female', 'male']),
+        'age': np.float64,
+        'sibsp': np.float64,
+        'parch': np.float64,
+        'ticket': object,
+        'fare': np.float64,
+        'cabin': object,
+        'embarked': pd.CategoricalDtype(['C', 'Q', 'S']),
+        'boat': object,
+        'body': np.float64,
+        'home.dest': object,
+        'survived': pd.CategoricalDtype(['0', '1'])
+    }
+    expected_columns = ['pclass', 'name', 'sex', 'age', 'sibsp',
+                        'parch', 'ticket', 'fare', 'cabin', 'embarked',
+                        'boat', 'body', 'home.dest', 'survived']
+    expected_dtypes = [name_to_dtype[col] for col in expected_columns]
+    expected_feature_names = ['pclass', 'name', 'sex', 'age', 'sibsp',
+                              'parch', 'ticket', 'fare', 'cabin', 'embarked',
+                              'boat', 'body', 'home.dest']
     expected_target_names = ['survived']
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-
     df = bunch.data
+
+    assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     assert np.all(df.dtypes == expected_dtypes)
     assert np.all(df.columns == expected_columns)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index ea56498cac7c5..8d9e55f5e6df1 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -824,3 +824,24 @@ def check_matplotlib_support(caller_name):
             "{} requires matplotlib. You can install matplotlib with "
             "`pip install matplotlib`".format(caller_name)
         ) from e
+
+
+def check_pandas_support(caller_name):
+    """Raise ImportError with detailed error message if pandsa is not
+    installed.
+
+    Plot utilities like :func:`fetch_openml` should lazily import
+    pandas and call this helper before any computation.
+
+    Parameters
+    ----------
+    caller_name : str
+        The name of the caller that requires pandas.
+    """
+    try:
+        import pandas  # noqa
+    except ImportError as e:
+        raise ImportError(
+            "{} requires pandas. You can install pandas with "
+            "`pip install pandas`".format(caller_name)
+        ) from e

From 98bfa765f52b1a1a4415264386d65578ec33e07e Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 17 May 2019 23:56:13 -0400
Subject: [PATCH 03/39] BUG Add datafiles

---
 .../api-v1-json-data-qualities-40945.json.gz    | Bin 0 -> 1042 bytes
 .../40945/data-v1-download-16826755.arff.gz     | Bin 0 -> 32243 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 sklearn/datasets/tests/data/openml/40945/api-v1-json-data-qualities-40945.json.gz
 create mode 100644 sklearn/datasets/tests/data/openml/40945/data-v1-download-16826755.arff.gz

diff --git a/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-qualities-40945.json.gz b/sklearn/datasets/tests/data/openml/40945/api-v1-json-data-qualities-40945.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..279a0bd82ad663e5c09d2fa7ef448472ee82b205
GIT binary patch
literal 1042
zcmV+t1nv7DiwFq*Sb1Ck17UD!Ep{<2YIARHEo5PIVJ&fWVQgu1X=QUQG%z_dH7;s%
zZ*BmcSKDseMil%PJ}1Pv?A32+oB}~%r>%<ip(qO4p<T8T+3@1VHT>@xCXFgmya+M`
z1BQ2Ich8(NJ9<jqXKnWThc4R|ZIL(0#Z&U-kH_TV*Qcb+c6oA<yz1KObye5-HfxKj
zOkN}p*|zKdL#ZPtE;1{d7>wuS`NbahdYd)Po3gE|zaIynXrmEfPr*rGdx*I#c6r%g
zmoKL{L=lsVmV;+!y|?BN_U1!ZU}avm*WK<WuixFhYTM=j9#Yjn#z}N0lF9&mmHk=O
z*zWUzzt8Ji@MQP-0o-vKNFqKys#oh~;hW;$Ltvno(s1!d9IY;Hx;8(q4X13?s90~v
zJI~RMu-|ocTQx=V?hY5t>*98FPw^><kHM*l#<26FlYYF)vvL;Aj&RQy8CSH5?Z@D*
zMSHw0@2Yxt+zUe1k_leC5QP!+y4n>G6&qd_O<R<=?aw{%aCbeb)aI}JpE7S|&udb3
z-cw{XED~jW*-iWYGJlvuN1wXrqHv6kNC&;!muXQa^b!iB>cs^g)Q`byLNO?mU5KL5
z+UQ1S2g9OL>eJvT1QAEJKgAJxQ7UFj&e~wCkc2Vzx+<Fdr)pE&L$M!w^k9-GDh81-
zgvMDu+W7>f1=BeAv15`o<vEQ`;U3mlL&3mIju;lT$DDYFc{WVg-Z2dgp;!_h2BuLN
zeuO5#=0Qf-HSJ$=st~|KK7(nuM*N6jQF}@?SQ6*eN9Q9OJ%TTak6WYN;Djm|*Aet2
z^=JfIQ|l$A7M_InfzfWH(c3Y@qFQhz#gIAT5IwPs(JSt)A!Xq3k^Z?dItw4Uw<Q|h
z_3P{M4Qf<Bew$})S1;A(>-x{+r!1VHWHY_=ZP~P0c{^OI<M-xh^)E&H$5qiZMR|Xu
z>^yu9fljp3i2szFxR9F$Y#w(v)pimwdKvx~c|i%34ff#K?0?TuoK~7~D;zS+fuuKi
ziwA<xdHtI@dxrUH8A*rNFmoIW1;jzjpc(_m_@T?{w%F!;w(0E|POv`YMYI`48Qv9#
zNor5C;(x~X9wLaGJ(2nuk~3X!PjG^*Q6Lo5IU5<iCUKf)&D<=4Fmect&46lB8#z0T
z>={-g39f^HP2>d$5Vf(BG%E=bB?*Xsg!c`~OrPZ3aIz;@QwHv;C<Edsj$up)W}0SE
z7(@i|)<=*Co-eCY9C~(|-!rUcdPyVW+3aZ)nPsx3x+sgYDFYn>MlM+4$$`;x<Ncms
zO%_e@@>4{cqsJs7cumtR%HlLdY!ZZLbb5f79dZEra6*JZ9`W#F>c6`Ngl~)c{Pz6(
M3AdqM{SglU0Nlj@%K!iX

literal 0
HcmV?d00001

diff --git a/sklearn/datasets/tests/data/openml/40945/data-v1-download-16826755.arff.gz b/sklearn/datasets/tests/data/openml/40945/data-v1-download-16826755.arff.gz
new file mode 100644
index 0000000000000000000000000000000000000000..824fd370dd582102afba8a87ac98e40e6ae96cb9
GIT binary patch
literal 32243
zcmV(vK<d9AiwFpBTX|dn17u-zVJ&tsEo5(ZZftL1WGyi^I5IXjH8n0_a%N@#tXtbt
z+{Uwh&tIY91yV&6-7imsz1ZM@jln0boVQ_j*wqiMv}YuN*Qd_^e!AzPnbB&2*r}8&
zacsJ0diwHx-K{?uElFF{m2<p2{^bwSwye1CTIw8c7bR($W2fpilobo>&zg=s6;E_|
z?EJ5H>i?hhXBFAdW2b@NS1bF6=r{QL#}9NvN_u>H%>QNoELqV53XA(@JN`#o!WQfC
zKWdA`BW*_y^FY`ERSR-oRHHvbH}`}+@>?HI;R2uG*WKU-_ca+;cwaAf;|kaHhPq4I
z=%4%Lk0oizFaD{2dURfvv^sUJi>7g%d08~%p0;b}D=FK$IyyZP<~%wL@>37~ISL{_
z@!+?<?}lL-pB_zO_*-}ID>ynmga2xstGX*15c%n-S5Y)IDj`ivnd@A1OZdI>h0&*)
z!A}Yeo}2qg3O^8>9{GNlX8zHsANy_$N~S^JOoK#f@_nxAx~^JANqOqre0F|!rfZ7g
zp9;9qsq^V0YWGa-{E8P8?h8j`q^PKQzc(m8`w^}Bz8~sl-4SQXXt7!g>k8eW_a^0j
z82^C!M(cN`&dG!}O+oQ}m$fq^RYA+<SJMvhzi0@2{Ax*=wgh2p*WWwJgirZ-9)tG;
z3D-3D1NqJ&f98+$+4;L>kEhPZzqL?CpBwe2f7Yy`WkH-TYg#YsrXa?JlJJz91gz(Q
z;52pfI15jYW~uTfkE;<zQc65i2^{<jL=kD6587Qr84$i7crl2;p9oF+OlbK44i6G9
z)v>qRlD7S`%$LuEE#VPL>YPm1gq3v*0eeL@a1irN;t*bht4fm`e(SqYn!_7Seb1Md
znDIMI$gU}tVE)e?rB!2IhP7lxp6ISEh`7-$*%&UE`r<Y>QwNlIpe4%P*oy)bJ4N9^
zAfB8)frk3j`7hS3I<iypo3>`B&a^IF@bfh|8(Gx%`lVCx2wZ#`gXx1Hb>jdXD*?w!
zq9dus52EoW7YexHv;ybknk*rrA(ovlq}-6@iaPIVwxFi{{OoPokD`MP*FkEAr@V$#
z>ReGU18Te>T{*}ikzvLt1x-org0oG+SgOdmiW|Z(MZROCT2QHDLfAcJIu&D{I{ru@
zlIgLjB2ht;q5L#+vzRAd_&W)Ay<XhobIsbdc<%FZMOg(|;DT0c=X@>7#(4hBli59m
zC+7+2=sXCeZ%sseK;pp2M$0tv)hDFekV?ijf0bD;^31SA=x3N&+&oZ6JsUGLm-wya
zMgW1IyJEG1C;>lR_F}n(eJMANQEr59>19}oGG~I-Nx0G=NA+e_8qkvfOf=33FACI|
z(JRs|Dda>(A)k?L*IHrdy-~GzhjHcKzeCPdk9=MN7BG`l7^tL@`C$Guaf94XA-sL%
zKbd-BG!}(2Ur?Q)%qI<%pM*R;rLha8J4KI|lFs=dzZO+hZ0QPwsFXdSi^u0eInHgd
zSYw((uZMqY`54kZPf9Ulp*Vo#4e8=&7AimxABrYZL9?g9Ik_X~^bXi>x24t2e1qs`
zw63LN{Ryeoxu~BZwzOX<b3x(5B#R+1^`lH0>#)>fSr>gJx~&@mM>h`~7;#|bo--ro
zsF*F-(n<g$08FHoEJQ)Rn~Fk^t72xkN5QIlh)@}x<k3jTpkI1N?#2A}D$m26K@_P(
zaw?hy<ofA(f7@nVZ-u}apx>fGU~yrJ6~)%i6F5@tyB<H_B+8{BQ*Ovv{Y+K`S%9R=
zJ<*LNTSliuq*hwbe@0nDq1g6;xIz4224ZYJ8wNJu2R8}B5UrgLt&Jl811JOlcD?!S
zuB8tTq-uKDb4@x{wBG^#JQ5wDfr3B27h?&M_-7g`{4<k?x?Vi4>k{7a?wLY*0<iU!
zG;2We981imDtDFy;tHHvD1}pO*JwBjrAt?0_3()ABD|;zI=>jW1z>r_1%LdYtW}cg
zQ35!u9$J{!W1s+wQHZ57^eyYZ$H%)ZpfH;f>#7n5<>sFfRu3v^C`Cgs24H4CiQFtn
zFn?LDe)M<URV%;-&v4*R7vtiA)_mu*#!Tj!VTVu~-}gdGJ_OO@l3?MRS1Yi?(wUa@
ziQ<2O!gWzA<JXMd_hym^1&WKdRiE0uD9d|=vmh{Re+8wk<`1mX@}$4ivu0}V8Cg*0
zZUKnVOg5(1CLUI(Byf?YnaSp8TGv|u<-%w>lYx8&<y>10iNbtYSau5nQ3QR&(~vnh
zVh@$glAZ&KT;BXvSHIm+qOGS)h6)*3mUJg9_MX-Zu-nP8{0{)E>yA0_cXr^6$G;A7
z1?1@{NR=5S{t@8ns&1U~Y6V%NQK`!l+IGh4Oekv^+35H2dx!913=9tg<2>+<=eYqY
z&1#VH)sPAVr;sdRzA1D?QS3&*g&eDu=SDz2;HUf^q;T}`X+zdvqmT1mDt!izn;~u!
zb^x^dXjb>$3PG4+waQ(c!SSH$qC{Z4T_fDT>C-pkPi{aT;G~cx5Js~VVYGF=)Bs7W
zifcmJ1P>1ND;1ze;b!sdGg>_%vYLJxQ-?Oc?ijo$kR2KCz;&uo!jr$1@X!*_Ap-OM
zirwN-#y+YAoC}G41BDr|b?cnWHya}I0pw7&G?gZOGJ6qw9kgx7s%9_aILff5MU|%|
zd6HmZiO|#LkwTB5S;M;)Z%3fGpf4=e#XT^0ajj~#HvKm?{MYOA%OuPU2Og~(Q&Nbd
zbDBWEp+~cQPXkBGc#78uUY5Qo9Eq|uB}*9wg4o_r$YEUBUJ{i2DB;HpGVCMGgXlE`
zH^q}tn>|Z(Fohri<CK8wpdL{6P}`3S6xz&;3HRbCf?zx|U3lL<TrFGD@5E{Z%ous3
z__#;}zuTbO@6jISM;{V!z+p-3JCitsU$Obnz%8*YM}EVD6QE|lLoUQTEYkA<Z!^x+
z$1*tdeaGPJ{khDx0=j!i#lbDhWdHoguN{y(6)QCWLM+)hpUV0{n@YY&lB3F@t`{gG
zNM@wdaydqIu18=M1OhfPQrGm^7n&HRWK4|U!An;6f*~3Fh+y$rf^*6SsLD#<OboG>
zb9t$?___hdac6*OAWxi0E+bi1@d?}EOFuK)En3|}xat-KJo!ysKANyO`Zp99x0thv
zHmw;0ugRajL4Gq-;G7uuF+KPV3J+X}zj-4GG!Q~eD7<Ltgr&NCc^*WHC#eTN2gif#
z3`pV;(gvdty0`U0^g*q(1!Uu%_SX^}7**IA6s3}qs%8-P75ERJOXpqPEP(=<w^VsJ
zO;e)=pS?s+`SqS?wFGJ<v)7J6ZB&M}pKRDuI+{kh^_+rzuZp$>^M9&<l@YA4cP)8%
zX!=_knwp_1=j20&9oly#;N%LSYe4~_nHNl7K7`EBFB}4|yN2IT^`MmTqW`V~Dy<7z
zB=vlC=kAT`#Ayf*oA|EJt59g-J)ngZX0?wqbxAG187kngh5=-r*1!X?u?JwPUe)z-
zuYvKZcImKV`Zu6b7VD2SreR=Do9(^T%SaV54l^uuaR`a(Xs&``-&w$MFWWU)5YuZ7
zRn49*93+4P6CfhRYGq9j!tmeG-YrRG3~v4^7H!Q2Br!R}caA2BqM>GkWxA$}?RpXg
zg=^Q0kWS!eKC<vEL1Z>bZOSP~T2E>E6VoRqJr5hhkP0?(#xz3C;vBO{?gG9%x4RW%
zt>G!azKZmq_Jor%UghBh6mJ_Qi+_Qn@rc5{Oevd4xAhX|i;oIVPvNZFVvLq}*q-yu
zY<X1xJJ{BBsrCHs$RUYRRuU!|G`4#nrmhPsc6-l;dtp}~OjHLf%T-+&XMnyTy%~si
zPW~#`s(?^)t`{@j7qmuV$!khwxE@kV$&)V1=Vh_5Mh1Ofz#6!nnWN4#WuD>zfvv@u
zKX*m7*zLc77bW=eaLjW*GIH8oTX%f?s7MnEuFIZ?EATF;uJ&FDe&Up~?%0h5YL88N
zA!8IgY0`t|db4eH6yFg>oLi`di-py0%SBsPf>7|#46+8F)6M&Co-eHo0QMQOs%j^B
zrV^9FIA6#P%8fO&OAqVeZMWWo21=}(ux3OHQs|wemX}CDn%Z`w1c`KT;EwUjcjW4s
z!E06gNsmD7<x^1+HQ(N+)NxO#Iev&S^%#6QIEWw^dFX9B+Bq>9qHlL(xGisMvRI2A
z+U3VyYTev}$4$w$?S1aOxJ)lN^~7s^o&SD=l``gY%A<*HnFUD>xVtEyN%x3l`KxO1
zD*1Bg&kJ+w+zx9;a9FrZ$1e&uc<6ie5L41Zk-_GMZUO*O0Zd4i19y;<j3D=X&DZ-a
zhXeZ2c-;m_YGXc$Xr*aC&y9&^jQ#4=2cFHl*I8`%`54RA(i>a6bJD}N2`~FVV91U#
ztNLfJYd$<@osm+ep&N?%c?S4Lw@_oVMHd?j1FAm9j_d#y8|0#tb-i3%bWk;nhmHiM
zT6&=AUFN9<;F-CJaDa=;EebQmnH^I_bjK6CIk1@#V2?d#R^(7WsxM6Y&YilBAEemj
z!Tdf+P0Ttnbu{HRTCj8S334^!=$*X+BA0Lu>XGIc(*)Q+WnpgR^Cha-y8<?Uz{GKi
z4)0k~r=-L-$f<Ka83=6#QlV^9s3z~Rh64(*NB-%LD609CV#eWzY6NNT3q`<&VS+ui
z%m;pg$@@H1jVoUWJ~TX&;`=7Rz6x0(4r_?tq(A^1CEYo>?H~vJRV)#PST!Mf?M5%?
zLw^M%QIf_2Y%_qArB30oXRcl#CilI>jri(CmZ(IO-~^Y_?HV~GPNH$2h%O0pZpjv0
z7Yc{IW3RKp{tuOalsCF?me0$u9O#Q#|DJFE+zj77fpu-vz9uDcW?efNRz}(DhLA=0
zt(NE>9-Q}3b1<;4fXTSBLsH`M0s!+CVuUz%9o8d++rK&|$8cun8gL^KBbDLlaI7B7
zaRT5*buQoFJ#ZjSQN~VjSAZi->t<8q+C~QmA2pf^`>u*MMucS{PTZWg9M1#q6{YBS
zNVt{X$p!-yWxS})=b<?;k2A05TYjee&d0KJAgj*_tC}@=mhU{NyHe!L5_k;2<96s-
zq2gRAPNRElH;vVlL>6>NUj?R}Lwa%TtS=^dQi4mB7WXyBOM|7R=+#0y@4!9A#uZ)D
za2#Rf2r-9^L{}!@W7t{$PO7(7*N=EogI3I+h%pl3oE1>4eG8Mp$JqV|eK$#kCzzCO
z1ev}={tG!y<DelV9p9BO`pdE{@UEmQTCHiZ>Fdk`WESV-9O@+83Eq076(=E1CeDly
z=gdTIBUT4J8*yN6l9Aa^-{p&N@CYo<&7|OAE8|_cvo^0T*>eHzd3?!$2ds=>vrNpZ
zA>gJ^&Rp!R8S#iaa|`kv06sAk21OHE{fEE}Al*QEYKhTv(yfz_Q-5qw@_D-NzYI4)
zLEBY&69eh-=)^vVPD9@2&2ur|!r%%*XBx!tYp~5ZGCv<EnnSiIc|X!5qCxhrD!GPB
zl%g8hsp7|#c%~9ex3ci<PwxEu4QkHbq5~ZDu@}pUOY|dRE&DJL0FSa_W8<Y;K@A;@
zWM5bH(rX}SAuaAu_A?RE0cPayGSc`PDM<6ss)7=#qhw5X)=&^i)Ows3r<q>(2+eNH
zSK71-B=Ncj2MPF_K7R*swnqFhQ`<Lt-TT-R)Lu^LF>i#SoAeap;5^k7kJq?t2wC9t
z(L`{OYUcuKiH+g^te;oEQGPQxKTzcoyFnMUfI<to*RH2$21NL0mx6-1SnGz{1zkQD
zE17Je23g@A2*mA&agvFhXB?U5*3JIA1BAK8?W>~wE}+y<7p{D*xs&l|`L^L#)b%Kl
zoeb22RWx_`o|^v?dKy!aUf@_0XAYJGvlqk>e|rz;{op~k+wZKc(0yMIJOtGq-*GWE
zn^f3zEl!1~T1i~|PY0>%f)bwTjUJui;h~xzOntSFEV`JBqU}iAqYdAd-3){SGA>F;
zGGyo6P<!kY#xJW=TX+M79!|mKJ>)1)uR%tAE5Q#DCxsc`@JRX_1XvsI00~wVJ=XpA
zD@<2>lqm1?o)#;kb-WoYmxg-hBgV7iS2?`sh88%Jh~a&4Z_HC>H3DXg#;QQDGmHvf
zZWm>^j^joOf*iy)y>v8wzsf6D<ee!1XgX*~@`1=dQiDb>R%KBOyNh<SZs3@kN^u_c
z?yZTYwnbeoX?0JTM2~l5(?O-a+M_5?4q}j$<*tukLjZgXtXQ$eH9%9wEMw_vv@#sz
zKx)Dv8o95Zn@2K|0DtxXo;X0WSZ|7DYmE*j{cWw!KT^K_7D2K_edn*V3vm+>chLE#
zA9_RS@-MS-q8ngX#(6YMW0iC!hgWYvI8@XjUfx?u^v!Q^Lb1F(iR{ma-ZM(8XP(!l
zUzA~#cx@BW&eh4t+>VK}0NZz=8zr1(<$d4Hhln2oAX-6VZGCLExZ5n}cpth7mz@tT
z<qXyOJ!(I2GhhC{hk}+%n<Wo>_lmX3htKhQDK``!Nco;K^Odt5j(ba!h<%559Gw$f
z9EP$5b%@nnL&t*u2NmPvbt{9kX>y`NI60IL0%wv%FT3>xE?6<kHO~2hnX%HVx7|53
zieoq8pIFRe8yXF1l_kE}lkc3U+zjQxp6mXc#;v-Lz+rbKrCW0`!fM^80pEd4IS8Kd
z;jvx-8#mZ?u;L;W065v<WQu$P=x_Nz{=*7Au8^sOVG<!-hRwlLg3E`ZzSxk2i)l=|
z;L10G8*6D^H9^vl_jV#qZZi{-jN3%YZuW#bCzlL+lwW~Glf`-rk{nVeA;}?|^b>?3
zQ83o^`2rhXlj_-s?|Z5`UXZ3C_tGY0vmwU!SE(6P_M7#S+R#897v%Y|(Nk;LFSU2$
z)k7tDmWk{=i|ictXTi7pvB%gkMt8k|cg<dPn9#+#X=!0m?nqtz<?h_5O?Qp$Vb`?e
zpSkJ^7fVzfq-cXY<TN75m1#pS9c?8m(apYW#g0CRCE5ls0$r`F8WFst^uUY;@6+$;
ziY{@zU7JZjuOoOk6o8;XvA#KC>U*-+#c~OU7JL;&bB9w;t=6OLZ$@;73*m6=yBV))
z^T_&kmKDPtg^OZ~RdTR;Xjki4;^72gMvRlvFdsWN1mQ-mPs@iWwp)S)RJhUy3Y6+S
zmdb8v;%z@sh}93Vg&(JGE<QO@J!SQU#kyFK6~NTXt33_8Mq)QtO|woemq68O>;pUR
ziUxV!-U9v)Dy>)azOEGj;eEm>b~Pa#%g>uFk5bdQ;iuB-^$&h4eNmTB#%JDoIpPz5
zTFbc<*Gh4BCJy?13_xo%gPMYyJ;}0pbj~@(m-@LH45Q-%`s60psKQNOCGlf*F~OL_
zZ=R?*!qC*qkAlS5df=TRQH3w7C5~XWK;9d*`@mJ~TD88c##cb)IfgQUr`(3jBZ!k6
zojE_8|G}wo6LQ0M5PE#8HK~9B|61=a_sjh`j}N_*TyMqu2L}jBbTf7f=i~!^*n#X8
zzsA1bjXuf`-wf1WE{OhAl7GE7HXA`+j3p|=N*9PX@4u0T|99CK>;g_Z{-v1N8;o&!
z00buW#h}F;QP@mu#AcVxhV-8x@$YUXtXmixf+mfTU6dHGY8=5YwH7j!8xC@rPQLVq
zx}t+BHD$ChKN(VRILYlrW^SDRQ;k#SFXcUaW4AJq5x%8Swqv8`9|@uV-`kfp$898A
z{}qH@#2X>g*wOvO;3iQLDRI~uDQ4b4Hb_E^1~@>2bnDlDm04TXf)**+-;KCqTXtsw
zg{oXno_uwhhmOV-@;)+eP?c*S@+G(curd>=3<b+#Uf6+@)NTy48uU;Qak{y$0L%if
z`yC~PloVqV)_8kcHXYGoOzc*7EXYHBrFT%H7ocBL*{&0#No>)L8(Dg1;dqBlf$HGu
zHH@2cg_^AHT7L$>znsuwKUuI|Uzf5c6nC8+%?9~Ee2Te%_*u7AN79`TaYZ)}t87Xi
z9Pz=Pw4sn1XMZ+W5#TSZ37@GcuXQ8?&{vPgAI3W6&LU=-bHD8Rjp*OJlCRNMBeT1@
z_i<fY8fAk1=76B*gu{Ez{401o?B2s_*52E)Z<(gCVjtxPWr7?5o#@B}8++8qd3Dt(
zXd8!NznUmz7(jyq0y0p~I*h#vbJ-<gRxbG97&&im5XTPM4QuDslLV4NAu@R9w>1#n
zn>`E%9v24W)y&#M?(7fvJ1Mzd@u$5N=9y~&_Fvqw*nk2JVztXf_R57OI=$6$i`iP(
z%PqZgU<_dH6J**K^<7^BKz1-Q8ky2yaRP{Ot3^Yit|u3)Xe6_RE`sh6O||WP3PThl
zP9p_<AhGq+<RVf&-csI9a|VqkvhI&BTy1$=X8DA_wrGgjUugS_8o?9C&338HT|6f$
zKN4}DZaD#KvUq?HUuKu2(gC>~uPl0@s|{RUaBfxxv|8c?ZGToOm??Mm92yPo3`@-%
zQLKY_>fJMV_%z!$PL_%_d>5GAIYK@FtXuT4Gox`B0=g*8B}QpR=uN~Wz9^TqZg%I%
zB<T1PzOqr@jzrq+r8Hh4EOR&3vcmyG2|HJ+yC>3o&fS6IC^X|bcKl5Xa#1p?r^>S&
zoLSnZK6HCBd>jny;;`6XmNPHws_!dX|A0@RdM}RC!U+F;r<e(Sl!aan^@Lu^)-1HE
zFr!c<q?3etLY`xw1rL^F5!UeRF({ukl-mHVQ1<^D(Uki5jV9KY$K3k}TP(&mO(jeT
zc<t;1K|ICLV$67dh2VijVFqN(q&k8WwPcK9?z{=2JF1$ydRzyACmV5bb6nou%Ce#;
zqzka2<GN09*)Wd|Oww2Ot8T<zi=$&ApS_lpU#~H-s!d!b6GPMa2yD$d>3Y@Kr_b)q
zJ7}hm)l6u@<p)^b2@`yi7!A+z%89e*Z=}7bw$ebKF7H^#%N3H=qBLup18{j3??;7c
zSACye${kF>!QOer9wb<lt6{Pp?=B-gwI_duJvJ)f#Se@10sVRNOzUaQjB%<v*JI`_
zy*e$K(K_mZN#v;{-vO6s*-dE$bl0C7N>kAQF-JlX3_GFf`08J{LV}o=b{Q<k;2gMO
z6xv@27j@IJ9=5?L^|&NL!XsHm&{!o2jv)N=2sWBsMPK{5sO+C2Ui-mtHB^aw$BGj#
z(@B6f9Jl)6K>#(W5*In37=SOCfN>Qv)g-tVec!ojOrAf4_ZUVqqvSJ%<k&bi6s1o`
zbe*(2kQC(&2u;_m&6J6*x_k5ee)5A)<z~3KpQ<1vh2j0=%5Fp&MCvcMuni&%G>WB;
zenb&XCRq%6fj#v;!U9ed;7D}5R)487$KizZ{yXzn4A&?5Md2Y4uAvRzs%ubvW`G@g
z#HSit3xt6a&pvc2>%#k$B-Ix+YID8>gom+En*i56NmaMU)JgortR*>&;vK9u^*oj7
zki<xbAjs_f&{RQp%P!^J{l@O}1O#vyY=h4?Wy>@*K);!0t01Lp^|aC82|5SE+5Q}T
zB8SZl#0n;pUcpb9%$8ms;lYT$jQO0C>J?xL4|aMnAbr@7ekcx)zbfxqrUm@a53Y;z
zNG6-r!4a^7z5}WQw!oX<N;W3RMZi`;A0)d^n}9d2)uW~F{flT`oFtuE<gsRVbPK(x
zeSJ4pKLmoK>L1x=X}Si48Bj->gq-XYCF-Eg*f%*iD<AP|9*@_hScq+-#dN^4VC=dL
z_x4A@oLy|SyqevuOx~uIwj4566-mxzBmow}!wHlGNnJ!aL_}kMT|+E&hCaWphT#HR
ziP-#FKZ5bN?4X7ET326UiN!FAMn-zXqku1~Qudq+5AC?Ajoze~y1?wD<T-Q@98+#9
zxKo0V(!5Vf`k=%MWCT!`wT9)3<6Y~BoQZ!We2=ySvJe(sLlX`}bR~rnfwQ|PHysI5
zE$&cqi*pK$VG5pkChU7Wly$4#%fH&-F2}QfCAwg@C3LP;Kk(mn8~FX#lDSadJ70s-
z^mB>L@IM(lHRafBNbcJ(O`-3U6f#Lq%e$8<aV9y6P}#ogmbX1=NX%%LbFM6Z6v|E`
zZjQcmJv)6JVKRnfogy6JwPV3d(3BMGp=`hIduB#jITkTgV7~z|tmVS<Mh^4zLFN+i
zf)GIi1B0MTHS!{=6lfQeW$g2eb`SQzSFnEwB>vE~tNW^w!8MQLwI`yvk_%hST;V$`
zp_K8dlHESCLFWn(GJ^?ZQAIT97pnYi6M0x@UNQJWhuXG4O~zw)ub`${x3z@SwHuP`
zF?cWyLd656@?)7rWKq<$D)w41Kl+d!LQRqw;jx&`O@m*JlzEu>GMLVTiyMejOtej!
zOeIU?O_)rUTy@oTqjE^38SiksW14>)DCJK=(RbNABusIKhH7@H83oCN<-a1Nga`1!
zF*B6Sx@;?&7Ny0@*()E3p6~NwLpmokzq<#n)0@tdUP<smK=u7Fq56LTY_$N-{dTAx
zeyAivDd`b%Tb&H@F6gpUcKlGbm&m}xm%+hx;0sj7HNfo=e~&$}#ugdiG3W?D@R&Sf
zQ9iLn18vJ@`=tF5LdXkBH4XQ%hIkl}jGhlecm;iJOe*ANj=PavD%dLKoI<zXHu%(4
zjidrOVIln={Gi*EEYIdO2?Adl1s-Ame401`oMDUxb~HPX7+~J141_6jkV1{L^gfH3
zd&rhLeXCQeO(!C*x~IzUuXSyL_Gr&TyHF5?b6U!TN8d_Nf@tRDZSo=r6BZhpZ({Y7
z202Yu1V`CCILZ_D|I)GMaXSDFdXAU6JcRB~FIlI409CKrs`W`;@c-KrEXD!+O&DO&
z2d@|74@f~9yyek9^)MlQ80~_bgYT$=ryQO-dH(tbzp@nwQK8EN<0S7q0QU|*kkgLw
zf`@w17h05MbE^=*hs%!n9<IKg3(TGH$0W=^z{e23%UveZX<w9gOb?;wgvnzd)||`^
zPZHrDNMTL4R-f;hr@haeM`iz)(_sCTKNkf8Z|+NC`;m`{bALht%!@`b^Ih>z@?Ee7
zbJ8CwV6759G+K6FDc0o9wA(Tk)suqENkBjVb#;<&Nm3#ywG8$oG<$JBZYocc?>Bk^
z4Rp&-kmeRHM*+_@+F(%wUTcHaRJef;5Wn}f7iW}~WpUQe1nP0GaG9+coFA2S?-CC{
zilIQHG^CJI>P?-D&4?uQpXkF&UKc}bu*WO;7f3E@oS*rwb>#8!EXB@yJq~{yH6sGf
zuzlh)5Uy>&Jh8Z4zlw4kV(@)9WA|Y!RRaGsN96znCvNZ9qUj!N#zxtJKyoD{Je#RC
zOez}q`bT}eDIr=>7_-HiPsWf&5Yh*C0HWKvF94@Ewymq*STELU0s$|qK*r(b-j&IM
ziB#j(UL7is4?j8h<2M16jDz<Bhkf`K;gOKYitBwz<<~`;(i{@)21DT42%|uFhV$E3
zSFD_EU_-G9P$kQGsq`?SSk(pOU^WO(GZ4>8_Fz4;BB1J+^Nb$4S;$vKLeHxRpUHh0
zSg_ryMhN`Cm+V!B>ydn4xl3^rgWjCQf+OH4H7i^f%(<;398zBewZLGn5bVMH6X+kQ
z9;?=hz-5a27gv3#mdlEGaDo;{uzP5?L1dv#uI3~)F5j2&uWH$$_e0gMT7qi!&*h3T
zHKh_d{(B1y&I-CEbW>sumVH^ak)4@OOT%AWKE&lgvzw|##<%*S(nOVwW+-SC#h11w
zYYgrpJZ)A5r{f(=ZJ8=pPOU1Ay?5PBd0#P&^T$P5-C=s>gm?C80~Twotmxv8RQ6|=
zR))RXC=@<b%Jksrj9C_u=@2us!+w3;R1mfCZ=+BrXXO^s4@%{omF;Fw?^Z!_-pm<*
zn`$BTWtatsi5Y1&I*u%2m`~urtVmXZtT6bNKnOZacqz9)4#T$JO1u1Q-E4AQ7<r+6
zagi|;U2k|?utOTzPn>3o@aHrqhUcxn<&(03lA&rMr$##HNpSoQ2Hpq_1>BqL10Do2
z(<Y6hddE)jCy3;Q09>lKd#swRp(^QF^5UUi9W&!vw=hmfj?usu>&sE(Gu{vNlDKa<
zz123uN|pu3nL4I(f2u?aAEfkPPt4B=pd*5LM&?T-&t7uEr9e_93*JlP!%&<62r?4D
zIbx+KQ%U4BhXVjAS0K{#!F#H2T{!#)NX_Wc3g0U1?t_Ge!wsb{DESU7sHe>Qh3(-`
zD@GY$`Z8_4sG)`_&NP?<gTkxRd}he??3I?=TVpL$+mS7E*X|on+KYrmu=l0rgPtH6
z-9RGH@vL_&G!2mDjGW@x8|I1z|E;bKqpO=Mpr+4@1&Vkv3ksSK`!fg-dh(gs2V;nh
zUS=@Y;bhnBiyx2cb=?lZH*9C}LA!tb0pKGc`yz48j=J8!TcmOeEjpoB$2m2;j#7)J
zWmD8R65_``@rBfyM}DnoEHGFr@LE`lKKrogk(SCa63I_rZAxR~+A=;xB`PrL%o+i;
zb!B{}=_K$(7bb`wZ&nIaU2}r~)rI=5G*I_4?;mHf;ES1`<4n}1g#*h>O4)A7<@|R)
z-rSG<W<>A-xFs#{37qQh9vVg`@*16s>aY5y3y$jzguM6<LjG6wm{1+M@2Xqjqik<h
z-K`$pAeChlMKSQeB)meXBbYeNymV5`Ag>&kQH%)Hk6&n2Tp$%eD2nzJOX|Or-)LV@
z+fL8)GSKV?E|C2j<eka*3g~WXsS?SRdld44G1)$LXCyQN_4FEO!GkGTH0Fs;z{3rn
zfadTa?O0M`Gpt47ZK|4%=hQDjVa!hA8+KVxbuXAF6#y-<I57Wa4ziSeUvDJwdDY+4
zk0sz(w<hvSG%+Sw3>>IMA#yz@#z@(ter$&)PWO50N|(Vj+q8jn6~@s|Wk<jh>`0G@
zbg6_Eo3X?}K}}Nv=-Y<F@0trITa`SP37SrsQtOfH3BEV;c9g#O#-B-J|38%*o9QLu
zs`lmM&_X3&SH4c5-HZ4>nPA1VrIT~uowN2$GqakKB@Ji2xw*fwGOjY>^)I&jJ(%ld
zkusc&1`Z{Y@vPNGMq%@>P8+ydB4ko-ydj+f82Nvw51O=hw(xkj@W>E8SkJEKfAF7L
zBS&|>j<9i4YqB$}wV@hI@yaO5=**neAoFS#2;jJxy$rcz;7&F}$dj=o;|{FGkpf{}
zkpD9RADDPj8UZp4Q7M;g*IRF**OO}C6FLR7kRJbJO@U!}&Z^`s|2RjyiG(mW*lp4W
zZ<(*qJ?Yb-Q+gTfTf!6QlpfzdID%tKOgyALBtUu!cA91$oTg3}7Rc<nMxW*J&V{SZ
zG0x?AUVGxC=Q&|JdJcxvm(Q20n1zMMikvZZc3b|%G~q(LHC}L{^jH}1uD4z~|7Bfk
zLwynXeRy4IP$=hM(yo}~DgN{FGDs4zBe7AxzJlnbv`{Y)3Q@6|+iCBtWVZaC`8Yqi
zVP*6`E5N>IDcPzqu^PNYvm{fU75Sr`b(5v1IGq(xjEEBYIgCL_%$6?>rF0|(c%!=w
zmYuy*kqB1fB$`8}6*Ur-JytEu!KjhN_K+PDNGg{ik@10fdQf5kf6ZBuJ403;Oirpj
z-ap(Ak`x<2qR?Cu(wk4%hAQ2aGd<+uy;|u`63;2mQ}35dVaNHb$X>lM2R$*Djr$Gi
zM({5$CDPO+A<2IYM(}lTc-I0b$H~acsY}k9vTEd87G7kyzb2C`sTCLBJWbNv{^sAX
z=dZi&R#i*(|ER3h=nwGTwt^VK{oeBkr=M7#`?6|(lnBt3AAx=8!&To2!z~wCU&F)z
z3#eo@%Sdo4|I|^Kcs}*JvFcl(DCVEwh)l&fT$V`GV2<68S|pxj8L~*CmZN=9N63Q0
z#2h|iv@K?BA`Q!%;O)732}-C%vOETrjjW`>JN&u${N?i}IJEi9eUr;(yYA4go&YKG
z2C}y9sXZz;oPYPKdQ?RUb4fWCVX^-iMoL8ynuu6p;zSQq`sS7I#UdOdE{nBxN1w=w
z#&Zr$Q0G&WgS#H9zjl+ODV5A><ea-WhsR{=pa0@IA+E!s;c_jeQuF+>|C_+Y$Y%EC
zHt#gdCa0tNRLr3-#2fq%<0Om#F_5sx;;mKgea9TTkqJQp5S(&^jvjL=uRHj2{&neU
z$_#d`i+@Z1u@!MLfRFSdig&G<{NkFN!37)~WKr*VeGdva!BIJ`!QR*mE^vF3zwO<t
zK`Fa)X8x*{uy06=1v8oXd?qx7qTLYhEKy)WUb`U#BIh+WY<)8iwopE4NUiXzh$Ba>
zae?AMGq+YzZcDpjXnLtAnOHz*n2LU}3En^A1S+kNk4u0VY^Yx8Hk6f1HaLuQjGWY*
zfanbEWlo!Q-Iv1OqG=c*{;6t5;eakWg!kzDj&K)QG?OLH^Xn}RalZ1+N~U2r5P(dZ
zmj*8m6G#+Uav)j3!B}s=;yhMAIDQh`j|=$;#fV}u1)iDhx?v`8X`t8i;V6Kb4i3j#
zHBl|HW)+w%6z)PC=X%O4ufYqB&Dk-iL7ch5&bmA;g5arq;4DWhbOFl|9Nsbu1V`S$
z){2c2vzHaBU4CQxYe;Tld)0#%ElFOHAnC0&DYR41NqTF|>P;se=*8QB&$PW)eR3z|
z-5UB_tG3<ZjIL`I#n6u|f`B;5XPF5!6F~1(*_V>JflvEf@k{hxc?n`cw(lQDC<OLx
z*DQl~-Dbm{lJMwd6KfqjXCZjAaZu2%kLAV}IK{;bkj7f;aA^0U-T=%4lw(`bVSF&>
z<)p6e#EcjIJ|o*uagVR6yBf!wysgaUpG+!>G@qG{=e&5s`Dq5=Hx^ZBE_jP0JkYHX
zr{$`H=@SiS4;@@}!OVB$=pj=hZz*fX-bX$?!=Tkw(qQ9bTbjp_f$7$wAyW7Rhrs6>
zykncLA2QsZC5R%9g_4+IQ<Uk*Fc<P})iL2;?wQ*y(hl8F36D&DTe74sDceo)IF>N_
zn^zm`<$}G%wt0lfh-HN)wPY@;(~<S}JO)f0R5}vICiVg11Uxx>=z)YTNXYqO-p5oC
z7bUysU0dByPf^Y;HB6qJ7>ScP-ba5QpODjE*R3^wM!SdF<i}><%_6C18fRk)vY=~W
zri=YyTW+y83Fme>5oIc)*p&GNv8_}L!z!1+`)4mf)Ar96cwas!uSIpDn1GU)7*UMH
zNJJDR+-kLd(N9K&@|FriqHKg;9RGQE7O?OkDU63i*ofZg#Y4Bks6?4bvcN!BV8<TV
zH%TV~a~I7t&V2XM2fJ2=<BsH;swEdI#=%v|{6RXHA{NPVpcDcI!z6rTnj}O0X_-7A
zM$9V5HQCf#Tv7d8D(n%)Yvzaj%H%*^N4w8*KjJWS%$aFotWje8$<-PJF!Ytj^c*|?
zt>)pkFO_DkDYd{)mW5aAm~pF@GB2SFoOlBIa(HsT?f(f{CyfCjVh8k0J8rm<7C=h2
zg@VjqF4kb%ortwHi<Ku*ofWBL>dvZWsM}S|V)wemH#;2eH|6!%lyAh$UcQ+y&UGVF
zwTqg?RNLTKRJQa|(xaV)fc7IVBisb#&BDx<;Aq=RiX^qh)(BmZf*^Q(&ODvk===~M
z6p_`L$9&jjoLOJ%2nG#544V$l0eT9scDY3^BkX`>2mAdA*~~4e);!Ybml3}-Q!5_4
za5>|3`$4W3?BOx#kZTF?Y+p)qP}I*J<QHBYpQV#kx4i|F3rc2BVc@J~maP+;i;qm>
z7QXx?bBG6K<;%7%)%Nz+;s|LN-I?U9X1noN8<lC5Z%&HX@Hu&^tM4A!wfpxMEnVU4
zdD8J%R17cgx6GUlbRrp!X!eZX(k=j;U*!Veg8zO1-tnWchdH1;FL24cUH}yXUC*Vy
z|Ij&GOP_$N{?}Oidu>*gL<Y3@#SG~2hy<Hz+JIzu-~+*4>V!mmrYE`}v;mC|+M^JY
z89jhW1nM2eSK9~B=a!s78z9zqrCXxboV4&VSq!O2Ja#kS=_(R)$IiFyl11gi#!jf~
z1qSntNZiP?>3CSMc<%t|;=ko}#nTv^e5U15xsiNkWOfo$doDIiDYQoK0dD3Jhv;rP
z>Dvqy`=9cEW364ugakghV+{c10TZ*(yp7o7#DpF7<&V0NAFL{GWWkHyGsjykp7FsB
z<P$eYgeV?)QC*ifW3qF60Ok{29$vj)1aXu*Oo%|%l>rIf4fIaE-;~XkJHhoWpwvsT
z`^IVmU5<GLP*q}g`dtMaiGwlWNH!zvd<Ny`?0N{?z!v%tq;}}da)nWuib9xIm_+_^
z1rx50FiDf0)Yqy*!1*-Kx=RqS!i+x8W$GF-Yw(o04m6O8#Dozj;{j|dHgkv&vX?mT
z+?J-U<`?drAxm&@Wc3v!=?lceMs!cD0_Uonr&;Du4om8Km}g>wz4A{^cvVqoN2F|l
zdwv8Nr-7(P?g!6e9McqQA4t@Ou*L16gW|usxhJY%$rF;CYlWsOxXbPcJ@87@56j0^
zX_3(3-Hf6)UC(s2_IYBl$v1%YM&=6#h`rZ?7l)C#$DJ`Wp>VDex9xmRuz8h^%KoNm
zI-oc~su5JFlv8JNWnM4cSnNrmyY}oi)@GtknYCD>H(?-%*z#MFmy@aWc_GS$-Z<u@
z+VovbWA@XQ`1bGHyGI+LCUZ_sHU3?<!A#`t#NylXzER07=rV4>V6>!Ct}F9-R5;K6
zc|fk~u74`GKt$KIElt?JEm_QhL~{fHrO&j?jLtdNy%HSHa=I-p$q9aSQ#h7Ne*9qm
zGBxYK70)id3wl(6rMdR2NHDqc$7kR5x)+YIB5k$9xQG;IgD8st(L04D@RwuXc7F-$
zx8a!0v(d}<7_+BM%v31*XMe%xZQl*Ddjz(rWoEkTmceVBrd4T2lzNBh&J@+#u~mpd
z9-168PjhY12`9vbagWP~vi*Knld760NO-$tAI&7Rce$hCX>$`%ksvjkWU%+tOG0dk
z@t#CE7l9qT+;RJ9QVr9G$4vPF8Tw~O>f*j!_9ba3RS1xm3ywf=Kh#TexU6js(UVJN
zv+$K$2sEM6;#r3k$pMQQec~YJlbd_+jIDi~EP~3KAGk9f%)(FH83A_p`!f>N&E2K2
zz^1&3%cteZcmvT$RH2HG6=5|ln!Llu<$7rVUWiPM;jnNIRDAl&;;+hv^0bl)Vw_va
z&lu_?h|u|1wSz>i8Cke@DoRD~%idijK1pV|sdysn8*Hk_%0%@kvDNTUTgWIY%_`{!
z;#d?v$!=J5F+%F29s^XiL+}xN7xrp|Y)YjJvM{&cN=cYW#g6WaJ*UylD1!{M0vyfV
zbn=<H{xZ|&eVWXCN5GOFjw}wK`LDB*{SK&?NDJ}Ty7{hyp>RLXa{;Wz=ffsc?Vs^b
z)N&+j$rM_K?Ewx2bNmA7xCJbTk#KPJwe=+>r_}x^FtSt^SlQwK(n0U3%yL9z1{^Uw
zAF5%1yx=Fc-!v;I_|xz@6v)6MHKL$QsM<A*ho(G-aN+ob0R1)(s)}JkBxV!x51uDH
zGAF}L0A3gS-}V8<i4>(Yg(@Zh88?{9;~6#Q^nLsD7;<%?NA<!lSj+^NcstTN<K3s+
z@;sL8?|QBp<X#KhIw_jX{fWr;4fi^62sPkn4LU0M7DB<h0~Zs1+Nak!c+MjFjrxJi
zoZmYg05K38VsEHq|Mhhi{Qi5wBj98ab(SqaHf9-Ogr3X*1)E#R^93+qa8Tb7u(<Zi
zLDW0BrEnIoO8CDkKP2*9wTXR!KIsQ9eRc|Y#+gs&&~E2Y9R2Q2JvBNR%@f5Z0gME^
zVL$e@=fJH8Dx=7T5=w9Os(h%IqLpbOYYJz~6;N9yM;HT%HMO$w&vP;yYFn-!2HxdQ
zW<gXB<x@#-ley#*sqZK8PuGgG{MT2L5sanUkf-bKeZ^)owtBV>bg3Hy9%kA`W=@v#
zBlBPDt|{*~r0nIy-*8A+$ZX`h0A~1_;|mQWS16E;bEIKbi77cmZ#svsS0K1hd-oD8
zeYfYhQ@XfPJQ;LRZEk?c%MCKq7<<(t%eFmsBH?413B7_r2gRds_;My|AQu0VU)!nW
zo%DivP32Pbb_mJT^)p`}0;+#EKX}$j;I-g*pdJhN)$<~BD)6)deABbtL~>bGb(T^G
zB~~+2h^Bq;5WLw4YdM*2+qELThvd*aTwnK?^mo%lznbwq>xQls*?H!Zme=FDrfEqT
z?iX{jV!@yMOSjsxAAcwxO1<lvn#{5Gl8+%yLpKCnJk_ly+Ry?`%Z``zOV7@H_XwcP
zb?NPUebG{5Ycwm_A&toMEsj6``7MaBxjTxH#h%t!{Q0c+!W{W0aH*QgXY}S~zAO`M
zV+awM-(3&Pe>H0Mw$+ws{4|-RSkf?pehLIJ<PDZUcE;CR2@4Tv<*gN0a3y9h<b{WP
zfH5|d^^`f+KL}xjjQiQa%i!xp=4pmzCg!!unAiM>AFA8is#o)Op?Ma@K-w)r!Ycnp
zA;w%t;GtM}WMp09_-|MTX1yDlEI4M7W%oqwKFO0LQ53ny$6LU(vXyWQs{Tp2X@_vt
z7Lqavxnh9#cH~{67PFSP8vt!ODq$ExmU6&WF$YW)bkIz)BjSsGESchy_-_{aX+@N1
zOc|oKjcjrP{PV+2_k<18Bzlji$U(?tFN2{DY$Xs}`RahqGV&1n*TF~c^{})LqALx?
zG1q+sN~^W#XeHEQ7a&1pQ5_TlyqIW}L3P3|M2U@Q7Th6@bDUuhlm*cH$;a>gOY3hQ
zUV>}_aQLWud-#MYPkuHjFv*2-55znio><3cYZ?jF(2Z?xOR$pEt&)_|ah%|s6q4Su
z@kGAUQu9eMB9fQj0^x&ezd@>!Xr3jB`^>N2HRW)}KWf?D;bA$IOSHOuRkl%iGwa#T
z%=$+NPFli@QgXy#z2g7SnnPD*<1Vi_%!=5?jYYs|qv}3~eIpg2QW@vq`%Ywog2lGE
zeXPubpA&yNvaO}VhG(sT`)?-~lkitWuCIwvu%=eY4Nt?EkEq-)LSEK0kKcLOL&Zd1
z4yQFL;=v_U36U-<j$nsVrj$w7w}d$fLanp1-qf$W@{yLatwcjI)}*i<k}jG2&XOTr
zQkPA{FeVm0S8JfqAgLilfc|W~gP*@2uiNtpU!0z3Bj1znDhu6jnb$kI^EQ5KgR6Vy
z%(XvvmN=^ycAg!W#oY?q6wGG*W9C=)Wg{Z^_lIgVV4P5xTAq4@(>!<->Qv@YDJo+c
zsgS=Gqo@#H%lm~8p|*9<56Xn6aV{HKUF@B;2Aq%TaVwM5P~$Cc{x^~urVReb7s<IQ
z<fJFB2*ZzC`#G<+ss&zXL8(xf;i02+WBLy9JX82VrMUo2Bcw8=rR<E!FQY2o8IJn#
zSs4(}jrI_(Lph{^MvVk<orE(p6o}%8=Ow!KkHz><^?$%PNazvb2`2do<x+!ZEs2>v
zWIlgc!vL11d$PJmc0I$vPoE%!<^;u>g1Ias<|cToKQE!3R~n30r-foz1HLd4Bl~UY
zMcI^?qu~1ra{^p@R}|QQm+glsu;<d#gQrVu+C?d5_{k0!P>Q-5$<xaV=KV;pMhKB>
zQtXE?5K`Xf(9jZ|)2WtGT`$y-a;ClAiyZ7)fxZaoblHQ6mvt)?;+@Zr{Q1Jqdb{?n
zE+5OAn|nDTrRvMuy6yP9>CXN<aLD$PAfBO#%p8U9{pcI!)<2oODdv?_m981`<~q%z
zFqsOw>aAid(xXW%9CwN>@PnisV{TNK%}Msve=%^EFa=URu}kt%u5)wTmvT^poF>#S
zsZ*s>8FA#4QejxbV!K3S0=x?|6AG~avSq1LB-i+cxx14jY2j{93E)6Jo=6D%s1)C2
z1t4i`UejBm*atkIH3tj=2NV*PKr*h=rDAko@U`yP*`>Bq&ZmJi8o2QDeY?=1(@_w(
zNZ<177td;_k1RmW1yh2M!ZK7~^mnY<^}EifY@%9x<JC1Y1Qx7<$M;o(ZG}cUg3L?h
ztSI`^su}LW4>y}DfR^*=4|YP?dHCZ@OjKSjnSH4KlCtYY5B0$ui$>+?Pp)t^4r(_p
z5WfFVwfE&6c-x{5sl)@bkO@Y=;FEuq^dh8YnJMiZ92r6l-9Y3D0Y$*R$)8=X>$ak0
zB;>9Fi)>0+N*%i&G{K_ct|&`fW)X9&mXOz%PgrL@QTY^Y$Yqgv9%6FCn9_-w<rdS|
zAOEAJslI)_Ei#h@BYenIKBle63?KEuA#AY?weUPH#-%Wja<ZL`8I%5&W08%+!+v6q
zoa6Gh3J)?d+*sC<Ar^v>cJqCpgP~FFaS7}xyh}-L@TXEt-vHB&^DfEvLKtYIEIgmL
z7Sfr(Y2xj5)lvs77I3~y1qFxfa2gD)QM!8(kBP!H9{X}rlYZLM<r;?7ViSu#bF%R*
z3};A8$c~0dQ^lUI(zM+Zvf=)~fF!*)FP0k`a<$C8*BCej7r|L~d&_)hdi3&r)nn%_
z1ehHEI+#5W7}BAq7hkb4Kt4;*Q)qb*Cv}4ww919y_iI#Y)(?rC@9q!*KR+^2kxpL-
zj~tAk!luzz{SwhEkLR|@o)s!}BR?cIr*A3vjqUdlH1!6+(sKgXJ@$o{8?xzfs6!(O
z{paohpXq&)NNU(ZRIr-v4y^Y(EN4qjp-<b&HHTNsTW~5tG(bw`nJ38O5#`&iSq+@G
z`y;caet-S*Mh!X?#YI<<&2R#UseZYUg1jHogplJJ2qNe6Utnw2(m{b15IcG9G>VxP
zMOMx)JEUo?AW5g~JG{rq*y9qaUVm9Gdl*~E9NtP6VX#fWnKVez-Q-INms$V>rV-tj
zrDW!}@R16|ou+Z&0n?n7%;Cn{!aX^B=mvD18rCW5XqIO#KRSKz!hxkf<H37HgR80<
z_E(p8X2GjSp4R|x9of3rC$}m}xPrnt@>c%WXj}bw;$&yH*(K(dW!SuTK5puY4{^8H
zDn-*rm?nuQ<qoiBd{xQAqAdI+4h(2ibRmJ6)7|xX?k%e^cXMdUmBDLqG;WaO^^01p
zZ#$U^u1F8Fdnywid4=Y5NhxK7@pYRY3V((g7TcAzH>eUIMKZVVu1Io1_OGcQRIi!Q
zx6QM2#%wL&fNPH=HYgBc32|aNvRhwQ;ATFm(xAN<0FQKL9~`QI|Cz`N%KzSUupELT
z+r?5GcG@{K9V7A_aLops%V`n9$pM`HH$B+-nsqy@R3|*l6)+NXDuPUNlbDHLUCzHK
zpOh)?LAmAD<U@t*b`hut8fWNbNP`BxvKb#9YN~32_YKg11@G#S9j6C^`k<aad+wAM
zZHlB&3RALQ)B@i2Ndhu1$41QA6zbrI<J(iCIpc?(4rjFzLjM{GXR&PBy(o<qN#SVa
z7zH^8gV5hqoOjQ~^cbsrtJ@m=fUi|gufy!`TgM#VAhUxU*e0~IJ6ALQjkQA<09-vv
zMQ!E|WTuzbj*2+-Sb!rWWd%dF0PdW>l+d;tZ)sY>0d(_mCs7C~a7TGKrR#{w91Nkz
zM}mYTvWRXqh9|z24yKpc0x%oQFQAm#o3d|9yD?3L*v!y)uCJJN&9|Z^Nl>_A23X6{
z_leGiJ};vv&xH;-1m0v(8#5;aK~N;oVc_n}NNe(LP?dKfnL2^NrcQkGn@bSXh!|lh
z;A0L@B>CmVmcz1abZu|Eq3T0o#FSr}&Afi8MOF+S35r^qjo`$9qz9B}p2~4m0diek
zJmgG}UVtF}Fq@6Ew-JV^bNPhzz8dc|>JWCyHq{g{0p2|!!jfC>z^@xg*syQ9R?Nk+
zcD=Z{U)M{S7&o(8Ixd{$nZvw5M<j7k)$;C1@xe<KM;xwlx#a893Z#xaoklU$Y$O+8
zA|G_TfztqZs`bjvCwU1u&pO-;8{M{m)R;4#aqm5g-0Dpk2p84if5oTY<&>UOFbKWl
z=@prx#8Hr{YI(lhHg&++#LPBTIbjF=AIc^s+7n-C$f~DIjtj|XT4a02B`r3`hSB#i
z8btOzbBPSt8mm=DAh`Bm3<}5eK?C(mc!{+a-#ZM&6?h6}ST4Ip^V($cA|(Zj$gJ=m
z^Xg->30OFTcb7zEi>BOebTa&`Za5*(*7x{HI*VpzjZta|gMgerbW^Qm>%9c?nhoa)
zAv23ZyZA}ATt2B7%t)DB?WTKKAW5P$nmJ}m#1#wX18+Sr;IneOtlV2E(hreatk~Vj
z3h-~$aW5A7M#E1Mxp07xYZg6Oj0KR`2(2mxQ(DlE*r9Ms?Hu+D>EHyJlE|32p3~w<
zm0m0qHZw$|5kPnduinaJSv*hGW<OPx2@`cEgGJKC|J81ZFCF`D6w?=iT|I`5z{jj|
z2X|@^Le@$J@?KV;zVzMr!2X4ZcG%ft+dTw_kScTYAxV-awviBa3<PZ~?Rv(XUk;);
zw|KW?%6cC2s(x<Wjb>b5k2Dj7;&q$S@(D=ck2&Egw%?d2eK`UsjU#FICFYH2*Q1hT
z+|l$8LwR%0BBzZan*1{M4-#@oi%kyuxSZr0A)|}wscKK&jAFI$!7Dx3JL&pva}TuY
zZSbvQiWdUi|1)3p<?DV$CsdU&-4g69jvZ4Cljo48$WY^|EcdN!Zu;(4eIwpGK^1Le
zpVa4*FMca~f)BAZtw44hA2KH?$($fxF|?bBM`IYVwTk){JSL$xLA80iO8r3YM6V%J
z@@WaNzXVKmraP>_e(q~HT=PGFlZ~nboI+BI>Hh&p<@B!6^j*F)eaD1ZaCO8!60&dV
zWMp#=2`MZEbjS|$vO=7qIbn*fp*c)Q^8ED?Dv{T~i-C!Px^c$P{tZu1U0-WgfyKim
zRkK(V;~Q4Qf-Tz6D6v(#)^vXQqg3_q<Be<&W15&GYsD>&<5|kbwW`k%&l7A#UJrMD
z_h@7$J@KSkV^E6#OlZSw=~5&Gal-Q@GgIEi6#D`6(R=n2@S>4qz=t+=_9(-~Q=Mah
zUqV}hqMwF+QaVTIb;dVJe_JZTdN^oABqv@UvVeviy%QMg^`);K%VA}vtudz`|KhTP
zzF0tx+nYBl+@D9-10L(VT_(oy*+e}@Q1Wt>y|s#(;)GqkBkl6derugjt%DWWr>)pW
znvU8j|KM3KA;v*S>5AbKI8WH0SlPATeWClRTq-TfhfwHV_nhX};xsEfgcLF|r&s2b
zg^Ewu$945yFH|XBA@psa*diQBr{F^JP1#&QH(e`KH!S3}rfliYA>DH*afMDW;SO<<
zGCh;KaDKK^27&0jNQ`h$j?@IGj3ZjkhawRWy=96Shk4PduR-8H9dJU^od$Zw@(w0L
zq1NJ>p2_$Riv*MoLG(R>_jvI_FdV$L5>=Ykz{%2!#5U$B53jEk!bh<2Xy=OB$t=S`
z(dMA510|%}5V$1YUpo+rME01UzZ|wY4y|5!9C8WtnJ_;N-~w>u%r&s+J`4a%ws}mV
zoD}zcD@5~fa*c?tG4Ot299IX$6r{K{E^D@PDKU9%*HQrH(h>AW=7P069Y8%5Nl(Kf
zeVBYv=(t0M1cKq}9A;2KO%c1K4&7DQwta5OabRBs*aQbpXpO7HVwG2KgLk>C>+`p{
zta_ZgC##vzNG_J?OW<ly9-R0Ck-Jl!B6>xMT!4W=e}0Z3R~Ruv4r|W|djV&a_)?Zl
z-EOM;WjW}kXk)gMevEppJXsF@$N8eb<<8!I1CxbEqo5~4yFA0=9h32>$2?gHj4Y(j
zh@Zw~d9Mm&BlBi|1fS|=aH?g}&kKqWuucU%j71Eyt;!ziRH7rsq(zb|tp^=RIC;<{
zjb@N$or$8Yz_293PE@v`;6m>k6~*zMo<07JN=RZ3OQeYn&dYwV$jKxzT9Ks)-AYY!
zC|=`mRq@Q_C;4So#K9725-iT!<pMs~bRLKE2*f9L-p|f{v(tauH7xM@dT1;qcp8Ge
z^Qyc@`u*1V>QgKE^X#6LnrmpEDFrAEpk%jzrWmTV^OfR!KIa9487W`#<Gt7d-?uWK
z;;4O9M@@qDjX=+H4hp<JGt;&z2OplcGY(b*z?hXN#=dV@5L*XVqmjp)xTl4APen=-
zAS9Zod(-uZ9KKS^iIooT*A%&u8Y}Go3+o0^0^hr!M_L;jDahH0Y(i9Qi7MtyiX7CZ
z9<Q;8N$nD%RFXxK0&T<0d^mPqp4gQ+ZpsIqWt33b00y1*OPPH*s{1u3M%D|<LiOh-
z7_(ug_M{sD5Z>870oP`8@Ne4X(Dme)8YxP?f&el}a2e)lK0+6>3o$<q(h%ooo8#2t
zJjBK%1o3i<BQJpv{>KNjDavhUj#eqb!ps%;R3n$WVcSFimt<j>qHdgJyuCnMw*&eH
z+`0%QvZ#S^5(`*R$6~QJ!FZ)OLJY1%2(xOF$nJ9&dWz8Ce0t?B#WQ!;*Jbb?%AtU{
zs+Y8okpG?QAfy1B93`SgBMW|QnaH<H&lV6WrMDW>_$qkABg+z>2$)E>VPEm#etW+u
zOMTlX4;@+fJ&Sv|J5VaN38N@8({DM)<qPvAzn074y6=>&yr^5?SgUx3d}2{C^hk!0
zKa72y|BauIokw;!;2aLlY`N&~S%>!_v47+yTi;!{NZIe74^?YrZZ+Pmz1QEM`@2UP
zR*E~7$!fT{34W?MFO(=>(Q|QC`>;jxKnz$|WS$RiCZ=4ugSjuz(}ou{_{cx{2Kbu$
z?yfg|fM4FWP)u<x^@{-+IqVwIsM2(TUCwRwM#UQ)kwW?%3xvB{0cc?debsrki}@~S
z*=Bn4c}`+%?r?;NM>0<5`UA6SvMCGv<XdV)BPkvxyhKbs6@J)kmdq0}`co%`3orZ@
zyi4{M9Dc_Ct>ujU`jYGu>%1|q#Qs$^OvBxE?(7e?!aVZn7V#-QbkHYE_iLyn|1k55
zO2-*+T#azjYZ)Gz5z@EPfPsaySUxkYC!qI{9mr(nea^sYL77;gDS7&y4_$%zN%|Pg
zpSzZ`RODF-k3Ms=zki1@;q_m7qjoK$9N>wSd@uDDV~5Yf4cWJ7=PJ*C89CJw!Cmi|
zm(-6-Bj5OUy^H+(_!>Z?33)H^nG8Aw47alBH;dAE$DxR<HMx3pX8j~un`f!5%|$fy
zc~~JFcHL6@YvMcX1qb6TVRXw{y8sCOEcqkZK_+q@PrkLa(-W^fumk_`zHDka<_(=H
z=KJiu<{T<UzD%)s<Z~XSYfwH<0({<Vkmc>OvH-AxETs2!NA@#PvSL%#!K5TLC8c&g
zi>+dyt9(MmFv(3xFd2XgH$6P@V1P`F(*0CJ_`PDl&2FlicF8O3Nj^`X^>X;RiGyKw
zIjge=Msnsek{|nW)%Dw@PLTi=ri`YzqJ|H??Z#_enP|JwbZ#XcAfHWSd3X^1pKfgL
zfO1q6J@6fl%A1=`aU9>?8Z%?nejs(3wI2scz=}}We}qe6HE_tu)98H{9FIJk^uBjG
z-;uT8V64dbkiZ4U3R_9Ej#Z0DCZFfv#ZS{PUtL8YkQ_F$oFUXWI0F|iQqT8e(p^G7
z(9gx6!Gx$vR;T`{RXCA|n(1m-iN9eDOkc?vvghTdzAb})jqHu=R((D!`j^&P5;j(j
zcy&52Wb)zg&olrGmzic1O9=Xq#fo{_Rf+j$6$P?YHWH=7j0ZXJ3b>KS$&QQDtm+$K
zFkz#;UT`||c)<tqHG8Sb&M>{nv~BIVy?J7*1#V^3gWzdW!CqO{25eDHm&xamP^~6K
z1OE<=dem~UZ9~rpMr6EWp?+<J-Wa-QBo<^Km+%@D+eJaSAX{-gkv)^BaCG>XR$f|U
zl9HH(J##ltwj)t>R6*yMnS`^t_as6&iIX|hM<=8!fy_=A;b+Udt<|RMJqM`>xdpO9
z129125=rl+<HL+ZMS=v#i)kkdb?ti=KL?A)%H5XB%OFkQ#FCUVuJeTjMvwK%)ud__
zrqD?ieNnC5{z7(}V(TE&>m)@$b2hsT-RM?iVnoEI`%LX;&he7ELsbmVtLl}VLX2`q
zAZGb~O58}UD$ct04x;iq@7J&URmCAL;=0Om3V=3@dX@07k|>^YAIth?M?K^%1~vZ0
z-pcRn@AVhsVWU0mO6oq$M`2iJMHHLvW_8~+x_D!+(n0cJxvgXz|8A_>&CR_(l#kOF
zhVn<<*wFKExFQjeq(sYw>5eTRD_0gDv0GKE3Mo<W@{K)^0@0fG{I|qP9qb<3+~-TG
z6KBYvg}t$18*KEsfyDS19+1ys^$org5)*bUxScVn3f}j4Wc}YqO_4Peq#XB-BIlgw
z=A|MfP)}P7tK09FS9q!%)gHV)DQkB;WR~X0jgjvg*u^h>b<eNT8wmfFH5~tM;1@{k
z(U;3c!-J2I>0uL`RF-X$c7)xIh=--IjdBM{^e2Jv;7<E-2Q)rRt;lzL$GpKIcv~V|
zO6!^)gEx_QXBy@dE|HsfCpo+#@`WcC?B+Mjo;)Zm;Mvr^RJsN!&)v>vSqe#Wo<K@1
zHGq56kBx5nbR=W(h;w36qZ4ueEeql(;;&$hg`E`L>A4wb)9liq{63Hb&aAY-&YTOE
zezR&S=FeJ_Iqa}V7TxkRgEF3N?x9}J=OVCAkKiFO=fHFWMp#UpxQ`yMT_fldW>dH?
z6weaP(8&H2ka5=Oy6XNC^Acs|L9f+26r{>-u^>Li4?Pnj8eev%ea4e0o2swYb$Rdl
zOnDZAdyrc?%#klLM0|Dq%|>P?sg`uEtTLJG{kTJIyOV6GMN{MLn0}Yz@&=uLu~55o
zn!(ZbXCcJ$v;91eJ;@MDNMC>VbpWB1^ygA=TkpEyU4K`)9s(0GkDevud|Z(vGWq>%
z?)m+z<Gy3DU#a$`Q4iBSYBxgWb?`ZoUC*I@e_3s4{*(xq<+}v5!a?_rh>g@;o$-3E
zam*YUST@-hohMX8w!_-GH04sZt^@#~wUg=;@(=g_%=Q6|=&BU;n7vHOIZUpC4}UiI
zCDGgzm=Z-?7oLN`qP6M1yj*tURPPq`9n`<{Tg!6>nmYDr>XY(r0BDua*lz*KK&oBl
zDwxx?dr4BYM9LGCwxM)}J_4YQ&-s=)JSJE-zEwER0(*yOpNs;;<wcxC#tD&tIHqHu
zQ9+E8+@)spavv%rc%~Xc-idscaQD<vnwBQ>lqrv4Zn`9A<LNhdXj3R)NSSM#7vL7P
zz|v0GFU7=kxkLp6*+nT&P-xtA?uSW}7>cVdYK2qDaaruTY*df1*_^Mg4&0n52@`&h
z5@I6B1@$DHm|rB?WZ5!g1o-cUjT~dL)>nT`xcDV~4Zgk&lGB&{zVF)Zfy60?4?r%}
z(5!y0?q=xCzW_`Sc&#QRo>XfVebYLDEPFmvVplcyf}`gI6W|?lYoP(JT~_3Kn}97{
zH7J~_ff}eSKTvlsegAi3Kiqfh{=FNA*N(jdZ<04L(qy19D!~V2JnqP_L`U<>b6P|Z
zS0J%{-OlSZ;^Xv<TR@-z3st<ELoTBt?#t@?s1F*6wt1a4eSWk}r7oCF_T4bJ#2&Ho
zkacY%Rj6RbvjAqYZWJZ{%Qa>7pvg{lyY~AIgJPn*PUapI(?-?)WDytLah4+Q&@LGQ
z6d?lV4Q0%|Ss|P}>JE9zD`Wm+!Ewm3=|6hJNp)qPs3KGzJ$t--ZoN1QhV++FV|0Py
zQU~e@J@3i1`jB%UFf-~-96(${=u7V)U90x*q@4GUl$W~DQsVgwq@35|vU;pZMr%%<
zCwi8oR;F!?k+i0c`B#AE%L|E=#s$yPxY>|=)hRdu=he{Qu*3>`h_$XUSo*}g-fVv^
zm5tfE<~_OQYBIh&?{4m){}%sAFPHpX%NbM+yv;XweZkv(p&P4Y2i8c{fKrNob!GfW
zf9Yhr5p8BF>`R&B=Q<Y`Ro^4)>o*CXI|fWsWq8nL->RaiMOx4_$H*xXPAbVLm@S_;
zidFP3p2-dC>uXi7JugR?0b-3D#Hb`flaS=Gk!Vs>`5uIOKq)}PYw@*Y1>mgQV9G#W
z^mKBigBL6_HsiW`)bZjl5u$~@P)}UFKbTv52XPgbc~ag>|B)HHCH2R2duKkM+3C}{
ztbd)THieHIZ^Ry&@Qn!(MFt`aYtAtiiccD<5_ixw`RPPIS|jGv;zxW}^~*B&chlD^
zUmCB-<j7EH5F{`Dsq6a&*y$$oXD6j90v<unK<CwHVtjKX2w6QU4YxxZA2srf{mo6)
zG$p&J<=vgxm#<9HEXHAd`Tks+xJ<?yhnxEb*)DhZzhY_K9p$({bF!2j+9W}g1xL?b
z4|lD-MgV|hY2vf06>$2Lxy%%X8SW>E<o7~Iq;N{jctmh$yg#&8QFRotj#jOr$<-?}
zf;rH;qz;x?IR=KKZFv~&!FI;89g9xAhyq;xr`vR1`x|-f?+&+-Nd<~)DfkM|@Y*Ad
ziGvU(FBbh+OX3k3TT-Vq=B`T`q_Y`Vl{nouCz0VuOK6gQs&9y1yMzIhCEU07o|R4w
z*F;VQKK}DC4dNurXHz~Bm4Qmw1_4L^&;#2&h=sXM;(9GFT4vbk(u;gfGRrgQ-m~qB
zqLR|9992u6Lr|8ItTLF(h{>?0pqp=2o;;FgU6~|#*3!!=xTv6=sjL)x>pPsEHm;rS
zPGo=<Rg5eH_dP1qrPG~!>xcRV!q_j9YYgrpK*_c)Ex&1ctz^eqW1~8$zT8*cvLkan
zl(J8=0-C8P(LZlLJF1$ydR&|2Ja`zEz);W(bgS5{>{{YH<wW|7N+)=gld2?5F2G&U
zGf(nsWlSH@UE<7-&#>_{bGpQ_n4`b$w;HHhS={(>@yb|A*1!aH#XA;aZU^vLo959P
zVa}{;Zgltm^75ftp1h^m5E$pX7dm*!OZJ0aW)DV&^De8+ret9fi=8Li@PF?O8=Y~#
zs+Qk7ujbEup`lcpJZmHZdu9o!_j5qPxhV(~%3wdzy#$+$R|4-Yp0+<-*YY9K12vj3
z?P8Ew5mTYZI(UZCW9$SV2d)ff4qI*taZ{w~+AzCRxvrg4p^`pU63%&nfmEnOw((=#
ztjil-2!B?+>V$c`a(DZc$LWU;<m=RsJ5aAlU58HAkogs;%gm<hGAFg+Wqd6g17;%g
z82Fj<9naC-^OU#;9;BcevwRv~bi(Y3EPdMEZ9;QJZgpr^=J=%R^`vOQYGHA8$nVOt
z(_uO@3p-+fioB|y@0x13rlp6I?rF&xk4<&xiv*w)og<4yN^}CZIsa~f29FO@QGww}
zx7DnlKG7<Ln9mB{()0Yi>B|Q`U{%yHtTKj$BImi9naAr9d0XG{?#mxl+vy|vgR~1J
z<ZPuzP6>}j7O4m<_*9p7z!zg<F-}HBYiC-3Tq;Sn<%ylY(^r4|-&n0B;hD|7C%cOk
zp>~nQ28)HjR!??GpKP@~2mAx}X|ottFOf)3qzDU6^*E;+D<;PyyXLX<M3mIQ5-Z{x
z)kYY67?(9pClU>B&aguN($dd!S7-k{Z|~C;J=q`hy@V!kx4sQsQ$Cp8m+lq}-c<9{
zDZ$twn(w@BN1s2$o<8*<FM-&ER17~<%qeFs;n?16@FN}mSFJ?g^=5Nqn(0-5%#{W3
zR)75ME$=1>u7D13A@dtpoE3?+`r5IXab9>pYX{D6Z7*4JFXh6RnTm7@#p?*9W7MNU
z!tyvy(XX6|2wS;FnG!JlQ0hqrfp{TVRg06a&{jK{hpy#+^&ZAbA*Ic~7ZNxl@)1&#
zeXaYux`iV7<{sS&jj#W?8un<h({(Mb@mpX81dvO0UtYVPm1oBNpOwW-mE@oik(f<K
zxKC+H^BC?;l##G6niWzUK1ee`Yp9ABQ0TU8LO#RJn)?<S2Egn;tL7acv27>m6VAu_
zuD0LbsT<CWx}l8Y-fzlgI|ajdTMsuXZF$PBFmlbZXsa>jTSf$zkmMd*g50MD^t(zl
zo^@?!G<F=Q6EVyDW&(w5Wrf(>YWnNwIlra_eD&j50&NT=J4k=5y&~H@NfpT4QC^a?
z0aMdZO(NBHo7Gcgagt5C6Y@xOoh-{UAOFr4;w{TdlHpx-`L}o$`6mO%ux@qVKO#zp
ztwDeaF;^_<&+{A(UXt!7rn0{30J<jte|!Rfj=)agAK6tPm^JzkSKT##2FURSudl{y
z;{oRX;QIZ(?XVtb6&c~&MHksvFYlg8giT7yT^rY^r3<f^lW5Wl_K&`knxtEm4K?c#
zG$s8riw^2t_tarRrdfP*qhV(D&pgLp{`f{~g3Ky~?{wiqF#xcU4y+o&p{VLhm<9TO
zKU35kBJGO~-@%}#JEf2<hANebV38en!qTXqPwsi*cO~1gUb2dL6=F8aU{r9FI!P}0
z{XJw~bifMPcy}5s=0;K{q|j3824>#KVKU75{6PsjC9Ei&J4xQOkii3!0oaK*HZnMP
z&HmP`1SxkllXgPGGhPHegJp#|(NNmMyV9H!E;k{`^1=jdNbL)--3Ez!<w`bLJzuM9
zmV@AGmp$}2%ofw|@(S}&S+!At7mW`|4B%EsfoVFJ<A8@v3+8rrg0o!T8#g;jAHsM5
z8c2$s1<!g1zp>UQ*jN$|MBe)o3%s%kkn;F9){gQCI%+$`=ny{8>ZtFYT8l4v`W=5^
zPurWaQL}U|(0ztR1{P`=!!c)mjS=R%Az`>g2i%_e<eb4sh$VB*Z)gwQ9GhN&20D%a
zI-llN2wXQCb~1ZH#5<J?a8?vES<b7jHJBd%!6oIL@G#^5Bg=}+4bsjZ0t*z?I^jRI
zq9nRFTyIS^_YW*c44X~anf}A;xnB_*u=2lsNn@7Qp5(dsvTZ5@@slQh<LW?7DRMY`
znuA$?%oL<BO+EN5l~`SjEE3(??&Z6_gD%QVxup0a@fX^UDRa9sY?7MoCl<DaId9c~
zy{)1jQ<Ir1ILW(Fkuh7?a%l;RJj>n0rE~@qi-A!Q{|sa8Tu!DUOR&5*v-+&Q!OWyP
zs2I8Nma_t<oD$?MdM`wQnG+}EHc{ey_H5<^&zhXT<*4qB!ND`SqK}&K*B3<4(7SqB
z(WpX!Zi-OADovmIV%w8$Y0oNWWl3f~cJ*L3-4Q@72VKrQAN&k%#8UIGSx{10dpZ2q
zp9!;fp-AgkkXkKE_Z`1)?;e3s@N-kv?0@Jv7iIN4O8~7A&G+ZIfo1ZEW8raME;-#s
zIRsZ#jdY1VndwfMs}<7#bt~`tWySs=xU8X(r>18C#Vt5pr|@lxxy#=n9AHcCU<t;d
z20%-^L9URKt}%fIar&I~OfH49a{VCk{pa*t>1NBUfOF;6$MMIlBwS?<&(M*OI!*{8
zE^+dh(QHP9C_2nz<V42te$+>dWnu~^n5FwsI7_@wqXZ!>H**8W(V(_d6qlmFYPQ8p
z$k{R_yB_XJqeoQ2xIvcNVw2DHCzZkm<Mm*)4dPMHgR>57uthHsO~0~$Kd7heS8b&y
zY;bS)^1iFC=@k@uTgBh=DfVzCZ>?%9f;f7Qw;!L^5YZ6kK?Ugs@QxAj@*1BOxgp!>
z*phh~rHJN}Q;cneFboo%aMnRqfL(%j!6)hn%9G~M`qJA~5xn6UV1LWtHM4;RszTYf
zFex%C(o-a;C~T>wjw2mcaM&9iI;z}UU^^kLbdVc}lH~oXpPBGGrhCgQWWV|8YpLum
zbe-l-*Qt7xdbO&_sJ8Rc95^Raq^5?M#jcrCk5{);;J2z>HXcsZ#3z>*P+YnAq|Emc
zRVH&bo40Mf$WqMe&2tjr8N0~*hn0p4zrSn0mp4?A>$<874P*w52x5ANQjx+n2Qr^W
zYm71>wBS{I({2-a>f_q@33*m$ah^_AWY6mUkxt)(jT-ylls?NF4!FiW=OgT5hvCf?
zhvia9haK#(PjXiVr*Mpy|6^~vaf+kQS_57hc{~(2Q5Iv>S`L;13Eg2f+J5TW!>-w=
zW-9Kq*=e_#l;h=F)-T_+tCH;HsXdTUjWagyM>2gsSe#JmM{`qmpoiQxA@w{@rB10|
zFkjvWXpq_M{Qw?l9?g8jQSy1l^|hkj=CyF?EPbkPI@b$Rs2G@55P7B*Fk8=j3HiWV
zr8I?Yar2BtXc4Cy5BQ!>6$lph<&)%+`VSg~TY`fKr9qhaN~ls)Q>&!se;R+vGu{CQ
z1Cn-j9PYIH46MrM^`qI{E@v}YUfG0h^U6j4FIaT9rgF=?f}?4{j|5izaVpCj`kIl%
z_xn(?kN;RoEos1y3Hm<`twgb@Lq3$P)xi9@=?Iqv@vRbv0+SLNe*i`kf;C~`jf<yq
zGHuxuWuqcZmk<e<az$L)P(PP|33~lUDh8R>EjcOe056^=2iU8Y*qT#s^z`(o(0*bn
zYQv5s*zl@fZ<W?Mv`|UX)ku@btd(#yj+7y$hE(kzM}4`0{%7SU!stmSMF2q*YMLk_
zGei>l`H*f@h12D#yLKo?WcZ0$;S^OoJ(=vGjR%Bt6<n6h`hR0BGZ@@GJFiUJVGv!1
zrU5q1krFIw8wTEp@UCQTH-D7n6H)}q=R-$(Llfn#@UA5KF=GheyrM8IV3CkV`sH{o
z-RoUgf?#Hf=ROx{K6Og@wztMC0Fjjzcy4{_RS!skGkBx@NQD26kD)~hD~pYK!TY8%
zMwdp<lo(QpDjTIAOuXp%`%kCl97EraFUn@c+!L4`ceAS3&bPxIci>q`;T0xx+s7qT
z`zr%RwDYNqO#(lRv-*C+41vI^iCh>|*?|+!8u}?ai=MZ28~TA2I}o3t#``xbI%h00
z>f8lp1ALwtkuBy_$cmp}oB&n`v+(yz!g+$QubzT;zxR}a-5w<wBPP%5X8YPx_kPJL
z1Q>o^Qh!AA@-pk}W2RF_!m_JQP8b+LBKtDi-`mm%7%|Y$lnfk5Lmy4Bl*GKUx=}Qp
zAE8EXxyxdHQ5q#Uzi8(3S}ZXT;suA6oJh$j{Bj~H2b6ftp2fvOb5|-*j=PmHHh1b{
z$<BQzH)@5>@hRuSw?XHYTn<)@d~eHyVs2i=DlHD*ZXUbdN4hvmIgd!HudgAjFJ(x3
z!QS0vjz({73WXk_1VT1*%C^0S-qOc%s3~0~@z}D&_Sg)YZwdqo)Nzn$S!~7hY1A~|
zk*+f(;)&(u9<Pz+lx|rZ`d@Y=K!Em!o-x^#3^<4!(Y>kYopt<MDM!@SwZDdV)8`1m
z*MdH*HxeXC43-2(7U2$nDZ6bY^m&VV^w$y&f87ni9#g8~U+F(3<3z~x@_gpfmu4z&
z`>|3;aiJ1lv%hYgd6Q8rc=UWwzK>OPD}{J5SxKB^a}%_xLjc(8iZECpTARQ|(9^|Y
zgeS!}j}wzg$V~7Vf;}{p!efpnJmyeKa6pbps_>)$HRSOY<<=;|pUOy8mjd4(PazIp
zg6WT?f;)P#lEutLNXgX|Wk2}%Kv!#5O5!~C(&ab;1}P!f`)_%VJPeSld%c5pYq#VY
z*>=2d#$%$qh+}`|#Qr%Sf#(%4iD3O$H+KNsHXBYqc=ng;t|IYELCQsC*sR>8s3c~B
z8qMEtBxJxPuiKl7s2djIXbDr4coCQI$5{oPRvHL(jzh-Z^%XQTwU#1`h0p|ZWud;B
z@vCXEs3<m_kupelQt-v11|cih<IgGoo#Ex?GUvEcJZvFThB~V!W{rrU7@mLw{GJR`
zn9<B@cBME7&OpyXABzD5)pF7uhE0h9HuAF(65Jp82)9!3_LsqVck3iuC5_3?AG9x3
z2F#AvDDKZld7DL*kA1a**@DviRs)SgV`WORRGE@jZ-Y}9@5yamNWQ~4i-Sf{s$G=*
zsvaDpm&enM9_MAfp~VKZS$5`3(#bJ@F~icPJpoyW(=1ac%9}0|?zGo?I`@qdd2VG#
z{rJ3c)VA-(#=4N|xa;*+9F9HIg$|b!yuO%0w@P@1-*&Ey{RGywn0wIe`g!i8Cqn~?
ze8*PsY0qkPcPyGs9Y0se6a1#B_rx25kdWRU6h`qB#boA0$ob={YnOs|1%cm4=5gqH
zo*>OLO-=4kRke~d5VPBRuf<r_|3Z<N4ttB$JqXQv@QRLTJZE0jHDH!hQU>eTZ@d25
zXtu!B*)0I=nC+)tu!EK*bKHip{sj>(zvC~75kpgcM~()J?g_R=o4-7LhP1lv6cU74
zrA;ledp`>==hJo5^J>kGRrk;}-6$#55H@h`@gkX{o?vD)A!YAqo@-&kQStk-+}nXd
zQ{=Fd+Y$hH#?Gqa2mb8mh!>S*Znj$_H6fJ1Wcuw~(p9^DwCXdSq$!ay>gus-<<ubH
z`#G%FmA+L9<dhY&Xaej4bxqFS#5L+2Gsn#B>#^|6m62VZ(FtbHm^&i&y3vJf2~|YI
zBut>Et``+aymnWj>SPltNzLA%`!47FVYd@}IXA*G#SN}Jfl1<gQ<rX6Y!W_anTRig
zo{UO0bnZ;ykXF2OnkL(oreT7sd+DFpbc_nvAI=4(yxH=lWq4+6$g=S+Ym7hM+^e$Z
z0)|vOB`Vbr9rJDmA28><le}qW0+!}<B)>e{n8Y&<z>+jLx!k?uu16Ok_=tTVO>H5}
z@@Fq`v)ZbzLlb1I@&bH1irjHgFiEtCN8OLfnc|v;Sw=Kv)Fi6zOi4_UOc7I@kBR|=
zC+ysuCQ!oJ<PO$(8e2VXUd&Jh<w7r4rt2>S2aNa&b0AA&z&|5^qA{l$8$;P_q#1Fa
zg*4K7{e^we%5raLBAAn0WqSLq_uRIdQm3ThI84x;_{d6xkc;x>V=3|v0u(*bABiox
zX<YE8XXgS$AZuCQ8HDGeebFU~qv(myO7hT59TTY{wYPn}+<fnD*<EN|({LQHbH(>R
z5<OXtk9<*9?L(;mk>Ch$%nGw#1QqIc1~}Txn-jiZVUc?6sR<oKlf}N_QI@QUe9+;)
zaJX19nXtPk>!DRFh(2X@_~#;sXVD}<>qWWJz*)$lSHh?`KDf0*d?1t;4tBx(f=U(c
zeM4;uXVRc?*e?KYG36p9r2jjeW%l0Wk$#M}tvlz0dcZi47s{=?kU3@j0<aBzH>l8h
zDj?1>BeI5Uz7-cUQG|1-q?6kyD{eFu<_i&HOLU$uPtQKhj$rr#655cYkt6lqmYaI*
zIl>anW{>?;kHTVl^*S7t<jq=S6R!i$a&ufVi>LCt$2(dL70ND&;>6Y^fsr$2V!M7_
z%0VqMDQJ%@5GSBOmP~)d43czO`>sW=S3gm#B;;67j6G+_k-}`Br7>P8Mj$;*TmEbr
zCsn`hG)|f6vAgANJtmkIW_GisJP0?ZkMg;u{JI=j5Is=|KGHkpV)WC2k$w={H6#xr
z1U+f(fmH}zdoUolB<BwYiD}2!tD#M4zWF%v*3Mlr3L=K6dJFGJ&EpOMpR`srn#yIL
z0X6|mx`5>O0~f$zg&Hs%!ROcK729lM?D%|#Ozjjhp;fJ^p@cE?TmIaCyFf}+woim%
z3MYWDSmrZ3k*%Ep2u%nXFg6vFwymmG;)TWK-{UnX9OhXCSKU)nZ9OX_X@WB+ro?s%
zy3(~jv=^3T%J<y|uB|8zZMz$Xkrva7{_1|RuM0@8{PiF>=*&YJzIOz6WKR%Qes7kA
zR?KS|+`5syvL?@z>RF#B&byFbw<<Mjq7Z4jQnJG@tqrur4??u0-+`ILDJ&$4=vv|(
zcUyCsAc2gBQ&thBYz#g?KMHjRB=Z7icUhxau!|k$wfIdLJ8LtW;KcGiI)yx}@5X)^
z@IDqNv5&Cdgus3w_>!pjxqe2Z!56v63+TF1k|6y9#Smbv*9=0hP7zIPWrDRREQuU3
zRZu#(rsoO^xiHfF1c?oMNRf1E)BPYzv&ik+RL|9Goekuc7Jq<HZczjxRF=xK@4fW*
zM1v<SDNYJG@JjGg>m*Enp#n?2-8Ss^lT<bejR_?7)0eaB`|inbfv&)!x#mX>NcHNP
zZcu#6$Te*u9Hk5xI%+GLBjdb`*USfB>EmY&h(CBE*tYV>9#W~@POqS|vZ>d>yRPN1
z3YkjbVeaey`<)e#c9)0Cd!N@F%dm!e-)r3k;hnVk^CgBs4LH64+HD~F=L0=GW{G{E
z9SoC9+NKnVkShT5*N~*CT}*!gxr@o0Gs0LYg>~P>`a$?i?3pZ<w>dVC^*wbI@J5<l
zFY+)0!@J|y(>uxZUo$~zsCp|fA%>}tskiJO?%kn>_>vC5QFzozXt=KSD^h_f8S<et
zJjGv=_GyCB%XX$M-L<{%Ri5**y6;(pcMY5dpBvz}me5(XcXXz2L3}k+n|vy3=9G1M
zh!D*}>H69{4tK!VK9%2VHQ)k@pjGgZ-Bp00z=rwUbat1DsVEdBu4>YG13ZCy0BPZw
zNsSl~_Yw{m+c-(*GbIhf{CnC)7)s^wTnT(2`ZC!2R@N(=qQIUhxL^u+>G<0}6^#4e
z-jrIQOD$(jOG=D;kR+4T^PftC|9B#$80}zC{!|W<JPE68o@7t9W9;^)a+KqlWXEJ%
zdNVXxoJ`_}K2aKHKJ?6=8{Gld<*AE5{(}sN+N>ZTN?(##!l9rt5JgP2PaFVC8n3jn
zGu{{wmh?H|;00@-`Kw59+-^$Lezz9(Ru;^$qUj+#Py$y?scY{xLAB~nP4Xz3QlbFv
zB#i-})JxPZ{=uhEbhlb7jg}Tm+?hbj`(9Evi^%>onuJe`jdN!}zOA57i2{QOVm6I-
z^b&ol`$v&beqRseYE?sp8ERvPHDNHfc$8w1QjXn}@+-+Vu^5+}I+W9z;&u$OyAh$e
z_hdM#KO;MvTZOkd-H2{@>O_(a90WdP8B%SP8*Cl^nL=zm<A`hDt%qumOlAPpfWf&z
zxCZPzFK`X01ns9SyJKde#_6gfkd0YnSS9O2Z->XqY%8^bq+CbC3GZEs6Q7Cu{Q9V`
zr6le#yH0<Y0D#0_UpEER>yGWZ$M2sPa|ddwE(4K4vKKtp%ghK@ldqY(yS(k~6vne2
z5$40Gf}Q{o0m+vAn5m>U_OgR;Ih~@<8+$&d%5pfPa2j)LikPGh%nPd>j*8F~Km?rI
z{lvl?vH%DB5n`W0{QUZ)YrDJq5hBo?xhyC;_(%BScROL$hw1*GC&d7-Y>hIa>X1o#
z+UiUKPJ4x6a67UHoZ;0|X##Pg=*<a)tEO(JZkYv)GR)fN*90OyP4k#qOzlPtDAJ9@
z;wz4X`-vBe?3-d{e6a^wo9x4WGxnGeQ;#N){=94mWS4XnmZ?ajFa^^BXTDy#xul8q
zbhd&=6sG4Eb#(_b#yCQssFhUo{HIxgv#!(7XMKpc&&*}^XOZ8Q0XUKT10>!8=4Zps
z0${{IV-i~!?&yd8Y`1xErTY#N9!B+@a4V$w6u>qz1&Rhbn?H&i)52m`GC(p+)MI@|
zba*c)sXUZAoH@(X3bQ$rF*Kbb<vK3cRSyrR<^g{0t3gs!2pC<n@*)LbKt(nU7<U9Y
z1KCxlGoT@Mj3B3ydTXA#A!El-_s`^rmACA0P;x8bc1eb?IC21Z@EM1-8Y{zI#e1}%
z&51vR7Z&>Y^<rddj*L^6W%lW!5U0?|y)~T6QXVatbm{=pW%DM+0{@=QQCb}Bhr~Zf
zC#7AMV-vjXnw7SE`A$)u2gA2qAUyhO-5Ehb@=LP5bUlm|$GoNS5oRwHhxH=3;CqM7
z8m8U!En<?OjTMF%7%5*+wCSYuSW~lHmhFh`e=^PjFZyfUd}lF|K5^KmEP|N~9A9=~
z{7-9219zD{blqsRUHq!7fA{yD!;cZaSXwXVhSnB05xZ8uAnQX}t~vn6bd5n6`C?In
z*sVRDHsNg+!RPO#naszSCVQUfLCr$}5&<{LiukbQDILW8iko{iOMoX{^()E%cPEQ7
zTuFtC`8QJTyZHbf*%pzndEv5rY_uv}Y^vMNV}_U-+cCT7vMO)78<L_H#ETXHF9bgc
zvp)`Rb%sreUZAQL&aXT86Pv1R8bfpZhFIPs)_%u4^4=*<T48FSvH*yfQG4ME{f#k5
z*ZndH=S}hbc!?B&+C;fO&Qd<}bz=49_~S>tGkXlU#aT32RlclNT?_e`Hw4n_9C906
zo=&D>E~{q4!aGUUEqj+`yRlQ`vn5Y6%v@IeN@vOevuUIW?J;D2mh=eaP{IuuIf}Dv
z>7%J(JEDJag0Ko<^Q8F5fg)B}jhAO;=vpVDEM_Ozq#%_#utH!*2B5J3F^G~*Jd%0?
zd9<hh_NxA|@|SnqN2DF6D414Kd^|4*XhHWd^7bThA@an7j;5X7s=m(rDSw`hi(o!^
zMI20ItfcptGOF#skAftP^{N1y>>oHB?Np0I2s0K?sD~Dt3I?@xMx{>6+x<`*bnR*G
z2h5oFcox&_ZbFa|8$r<hsoa#!WKMH7(Ut@!i$%w((So^4D=3n)|7FU0eO(W$?f*G8
z#lp6z=@+j~Aus7<^)8!k1BpB)i?8yP8DAH71)zl;sE5mL)whb7fRn|F7o|Y#9YKsC
z;Y;Uh<^Tyh`ev`nrNWRLI@EIXMRwpwuF7Ez<YTH$^Spu#GBat{><@!Q*RYu4&Qn>1
zh&i*^kqo)gWL%Z?y{Gv<kLS}hj;pF#5(t3p(WtaZoJ6@-1)~f#=lcsyMkMs+`X(@|
z-*3TF6g7N73iQFTT1=7Tvvf+4T-7%f^=1sPQ4|W_bH*z^@m%J*|Fuhq*));<ij32~
z*5r`IFG;|u<BoIxA{mloe7w^h#q33bXa|U%;PR>KH&zqRT)5(XvCMhWg6sSGu^Vwp
zDi1No0W-+p^0%AqrtE{)3&2Sm^M=%H&7b-DS9QB7weNx`dwn+j4hwGxaqn3K;<|pR
zqkcQx?X#ZgoZE&%aELg#)T??>$#`DoVyf8F0F&e?I*$H;LQ5Hx7-#VE^CZEjrNL5k
zq)9ci`i2TiP!!EwQ3m(tVX*hEFK;SDPt}_(64e<sg0$GZhCV>u9d}_!t=ZS8u-HG|
z|MPG^-N#Aildkhsw_aD9$7(5JyN~U-g!G9g1^^^cGHI%>x`B4-Er9!ngY68rMs)TU
zaE!4vDfX*(B@HYn_72JdnO1MJmkS$|v_1Z8GNW0yZfqU@Om@U4GU<2j%X>UG4tSMC
zPu7fAT1&p_#-TE+fYxM$P{##HHlH>J<8Uv}F$T6m1sVq=Qe%qYaD>@xK2=V>+Df#F
zze5cm|G8CHFz|y~z*J>oH&$;^%4rUVYd5hT=S4Ja<MnSj$%+%Tv7g72R0;5F@kyW?
zn|YkW;5a`_LcT-7vZ?hLs||}?o?H!Ai1w7in<iy4x~F>cL-`0UIr#Zy5M{F&?^N>~
ziA7m$?u~vV_P>wJO)y4cr~<JwXvm(2$=qTk`HmP{Q4qBW%x8RB3TPqd^ULSn)=g8C
zA3}rbaHqJuu;d9elNpG<=!tmqOV`4fBBs9ixcT1{chdDxe-3y8=k-a~G~3s|DRZ%@
znx=H4?iWb{Ol>#b0U^r~Vh(@xJNEmYIW{ksL6jF|etfwgl;HUI(VO`Tqzpog=vUwy
zWLNp0Cy);oB~?fhxhvzdBF>TYFY(zOCS~=#YWSQQO76ueU1(ZEM-t40hXRW3u|iCw
zrfJ|Y`?soZNz9iRP$p+u43V@hmE|zdTB03_y;+Q<h`jKtZvA_^UGb&A7u?q`k$~H)
zJB0<53Jq8V)9Z}g6`V(xm%%)Pa4*XDi#M;{<*)2Qwj~t~lo(;n&J;ATX+{Ih9$-x`
zL7(KsODO>C$Zq7Og2~a;)p^kiJV@|UV`RNlw?kGy3QqGwn6o$*<p@V#=Ch|%eXQLb
zc9{e<m(Sl>luxROUQe^KTNGkVCzr;v*l7TosY2r7rUdX8Ney^%0=%wS_`=4;a(kl^
zX<w%3qT(H?xWzrdFWG4rwm)&4!lW?i`RMVzud2SOTe;02U=CCHu&F8O<6M8UV!q>>
z-Ggb7IWxV>yV}$}lcdSB)vdByQ|UaCiWOaI-e&E^J(NDhPRS8!6YZo$I-lrH0YUYe
zc}9aa7fv|E{i1*I{x(0KAj;`W=JTqa<j*E4sUj!JcO<8njBcZv<o)<9=-=7#5UNae
zHo`C%oy-aKDa>}XZZQd+ENgPY9vk>EQB;{tPb67<?+h9jAGT0sanbce$Jn5}qG<ZX
zs_X8w)5x6TlGI((H;Eu{4#7E&3u9`XUj*7>fTjQjl4j9fAWG=kYhwuf{1+tjclE|Q
zts;JjFs@>aFNgC4(`O5y88G~2(BXWKa|OykQp-BCnSL(6Vedt|7B)A2p!*a)9A$Jj
z-J_bRyTImo?3TQ7c>v4{JlUJISOegpC8WUa<%_Z~*-<QC=xMGFR#k+}X_`lqx_iCV
zDFv{z{yDp8;&%AZB-^69@0yL;-Kv)mH#sCh;IHUOR~BIwaf%G5XOW0sSG^(+7klpZ
z!%0Jq&9}2G^rsp?-a`dLTE&s=y2dFZql`q%*z=31TZJ&-KtXe6ufZz<0d78@gQGqn
zj)i>w9$GpJWJezcCxp=>lc)&F36ad{S+e|4;25ejMZtO-{Fm)<dCyeCU=%MUxQ)jj
z{MuxGKVQjIiJjo8YkcT?Bo51x*|gnc-%N_K4tuUiL5h$&6w=|!Bl3G_3)cJ0kGosG
z40n_p7f?)<EQ4y!%oJ|D$e^KTzCUxJ=8=>|F#e>lGhy;9))k`~$i&r+_XJQvrCG8=
zn@}hI3#d3>wfK#o%`QUV5skwA6oe@+-Xl*ab}$V<QPwK4K9aInG(-J#0oXxw@%2ba
zK7H9y%Q6dT>Sa?u7$9YkyDp9J6tyGqZc^_D%Izi$qj_;LZmRoI!tRCIAgvRb=P?U+
z7PXWnh_1*}>N=Q$1{4x}?>&L8h~T*DDM?i%GXz;OGl8;}4KPHRGlb;BzziXtf%NuZ
zePNNP>g{|tw!m-yeyC|=qk=yWl~|(K0|mx@1C8!A^VF!$2yEP^?xRj(C)*trxq3SM
zIjLO^L%H-6t3ysBA94&zjCJLV63get6AGaurnofnM(Dyr$Gk-;=@sFNCgObd%wK;2
z4(-u#A$U#LmwBeUVhy1Cf*r_J@cIkf`qFH~%q2p{vpcD^J7iLf6b?P@_280SF@qgW
z1}a~8_;%BGx5`uLZ|cXgv9}vpPf{<FO61BJw4h+`ZNp3yByXXSIQVv7Zz^wdj<bMC
zz5JzKnay7=`p|}fC6fuVFHj~F1)uKDgQ@^7CFvo)dJDJQihMD-Ffk5=y3bQRzbIm@
zZZ?Pg;iT)5BuNcouXJvEZ&|2i_1B8c5aDRHE0_{$goy@5F6UOpop6^u6_;cpa*puV
zEtzXIkz5jQWb&z+M&V00-pU~(pURa>P95jzROtzyV_XxsGS55@PAalc9me5KQUWQC
zv{GwPI>vmNFt{WqW&gyy_C3h4R#IjF&>9Cp9{)@<?AplgoUH3_zE=;_;m!4-Om~o=
zF&W?0yY9>pq!BlMGs-qdfsIVmN+YWG&Z^~8Z6J;EL`h05mUHtsJmMI0I7~h1L*~#S
zHw}j00aKSV>>>7I4*Vj&l?_B{X1O-n&vwd;CS<}L8le~v95vg9&e!9&I-ZmmL*T$=
zeq}P>^9k6B2o4DHj4u+t5!14=0<Px-$1o8ktEwU=S<z!69L;Bo8Z24C-c>g+eOO@^
zNEy8A9!nOgI-I*dUj_E_qH0#vb+wWWcK3Btc{m+WQZYe{{1r?jboecGJ&MmQvQm{g
zD$ELqxpqkV2E33$K{3tE9PHz5U;7%y;mLN&YN2{44IA^stnuPh&@9;{h&@9;ZU+U8
z<(|Jjc7+xMxjj!-NQk*)(Zfm?a0TJBJ^v$`32r@*m*PMFRG?eBU{PJ7q$JOl0{e8j
zWG7)sWJUQeJkg-0JnfyXv5kkTc>E979ka-l3Gq)NiJhHz;s{n@hFqtiaXh&v4V}#_
z1`pd?{|a-T8y!E`!vJJH8M^fI>CVKjl{t$VM|B*sXb%1YLv$^LqJxt(+$FYq)CG<S
z){|^UvE^G?HH!88@CNvBDvuDh&4qbfUmdoar>cK=6C72|T|KUYKau|z6JnpwUP<x&
zj%v7g!+A2D6#E8T6>?mih(va>1J7rilf_qe{#IRI`)T5x3@J8@Oy=3XRqYRXCeCZf
z1?!?x67Q2zV3C>XBA(CW0sVGgUMucHRp<w880^95;<iE(`s@0SMOj`O$A6|mpqWis
zOTCeA^|HFt+sAhAPZiA>3XX75hdC*!S#MphptVl!pJx;+;?Y7`yeOPetVxu5h4~-=
z5fzm?qSQTSr(speP>(w#d!nXRRm;JLFoc*Iflt17Y#Mn)4$B_M>tC}^5PYlP&wYIW
zyrNckVRtCaNHaPoPSLs-)3om5z)PGoPV`1hFMv?{-kqn0%`<5}eSQsr=he{E<kl-B
zRF>ureLMYvpcuPH@p>@#&#jG_7s{L-V?0p*L&no4TcpkrMw#0p%w%d2L$#*kx|;go
z!Kqwu5|I=;eWxB}v#B20m%o!Y{1_N=h+5Q+JiFghqaglM4%d||KI0s!n!c1|*qPh&
z%Zx~c2$9pQYNi~3D53VVyhYixX!>*jx0InXvfU9Io@SaWC7#(i78aIMDYX#tt^DDQ
z5et8gM*MpEA#@W%A>KpvGZ7j8Ew8&#6I(LR6t`Jh(S5ZadFJj%63_gSM4re^lKAD2
z=UJ6{($z)zz#P)0IUzjPo0~b%$s|nuqCT7CA%)5Xgp>RG-x{WGt1H3WHT`>qV#>mq
Sn6O};;r|EmyHt!9%>e)hl&`b^

literal 0
HcmV?d00001


From f162cd449bc8cacc64d3dc236ca6ddaa2530ac46 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 22 May 2019 11:49:12 -0400
Subject: [PATCH 04/39] ENH Uses categories from arrf file

---
 .../compose/plot_column_transformer_mixed_types.py   |  2 +-
 sklearn/datasets/openml.py                           | 12 +++++++-----
 sklearn/utils/__init__.py                            |  1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 19651cd7cf622..9233e298d9fce 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -37,7 +37,7 @@
 np.random.seed(0)
 
 # Read data from Titanic dataset.
-titantic = fetch_openml(data_id=40945, return_frame=False)
+titantic = fetch_openml(data_id=40945, return_frame=True)
 data = titantic.data
 
 # We will train our classifier with the following features:
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 44a0b89c188d4..d8991012d7a66 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -300,18 +300,20 @@ def _convert_arff_data_dataframe(arrf_data, all_columns, features_dict):
     -------
     df : pd.DataFrame
     """
-    check_pandas_support('fetch_openml with return_frame=True')
-    import pandas as pd
-
-    df = pd.DataFrame(arrf_data['data'], columns=list(features_dict.keys()),
-                      dtype=object)
+    pd = check_pandas_support('fetch_openml with return_frame=True')
+    df = pd.DataFrame.from_records(arrf_data['data'],
+                                   columns=list(features_dict.keys()))
     df = df[all_columns].copy()
 
+    attributes = dict(arrf_data['attributes'])
+
     dtypes = {}
     for column in all_columns:
         dtype = _feature_to_dtype(features_dict[column])
         if dtype == object:
             continue
+        if dtype == 'category':
+            dtype = pd.CategoricalDtype(attributes[column])
         dtypes[column] = dtype
 
     return df.astype(dtypes)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 8d9e55f5e6df1..3b94bd85f08d0 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -840,6 +840,7 @@ def check_pandas_support(caller_name):
     """
     try:
         import pandas  # noqa
+        return pandas
     except ImportError as e:
         raise ImportError(
             "{} requires pandas. You can install pandas with "

From f3818f12b8f9430d07b4a95d8c981b8a86c178bf Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 22 May 2019 15:05:56 -0400
Subject: [PATCH 05/39] STY Fix

---
 sklearn/datasets/tests/test_openml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 034d78f144753..76f368676b598 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -350,8 +350,8 @@ def test_fetch_openml_cpu_pandas(monkeypatch):
                                      'perkin-elmer', 'prime', 'siemens',
                                      'sperry', 'sratus', 'wang'])
     expected_dtypes = [cat_dtype] + [np.float64] * 7
-    expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN',
-                              'CHMAX']
+    expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
+                              'CHMIN', 'CHMAX']
     expected_target_names = ['class']
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)

From 61c3dea5b601d11ec129bbceb4a384ca014e84e3 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 13:35:03 -0400
Subject: [PATCH 06/39] ENH Adds nrows for chunking

---
 sklearn/datasets/openml.py            | 122 +++++++++++++++++++-------
 sklearn/datasets/tests/test_openml.py |  31 ++++---
 2 files changed, 109 insertions(+), 44 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index d8991012d7a66..6745fcf7e44b3 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -9,6 +9,7 @@
 import itertools
 from collections.abc import Generator
 from collections import OrderedDict
+from itertools import zip_longest
 
 from urllib.request import urlopen, Request
 
@@ -268,55 +269,96 @@ def _convert_arff_data(arff_data, col_slice_x, col_slice_y, shape=None):
 def _feature_to_dtype(feature):
     """Map feature to dtype for pandas DataFrame
     """
-    if feature["data_type"] == "string":
+    if feature['data_type'] == 'string':
         return object
-    elif feature["data_type"] == "nominal":
+    elif feature['data_type'] == 'nominal':
         return 'category'
     # only numeric, integer, real are left
-    elif (feature["number_of_missing_values"] != "0" or
-          feature["data_type"] in ["numeric", "real"]):
+    elif (feature['number_of_missing_values'] != '0' or
+          feature['data_type'] in ['numeric', 'real']):
+        # cast to floats when there are any missing values
         return np.float64
-    elif feature["data_type"] == "integer":
+    elif feature['data_type'] == 'integer':
         return np.int64
-    raise ValueError("Unsupported feature: {}".format(feature))
+    raise ValueError('Unsupported feature: {}'.format(feature))
 
 
-def _convert_arff_data_dataframe(arrf_data, all_columns, features_dict):
+def _chunk_iterable(seq, chunksize):
+
+    pad_value = '__PADDING__'
+
+    args = [iter(seq)] * chunksize
+    it = zip_longest(*args, fillvalue=pad_value)
+    try:
+        prev = next(it)
+    except StopIteration:
+        # Nothing to iterate
+        return
+
+    # yield everything except the final value
+    for item in it:
+        yield prev
+        prev = item
+
+    # handle final value
+    if prev[-1] is pad_value:
+        # uses binary search to find the final index
+        lo, hi = 0, chunksize
+        while lo < hi:
+            mid = (lo + hi) // 2
+            if prev[mid] is pad_value:
+                hi = mid
+            else:
+                lo = mid + 1
+        yield prev[:lo]
+    else:
+        # no padding needed
+        yield prev
+
+
+def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
     """Convert the ARFF object into a pandas DataFrame.
 
     Parameters
     ----------
-    arff_data : list or dict
-        as obtained from liac-arff object
+    arrf : dict
+        As obtained from liac-arff object.
 
-    all_columns : list
-        columns to return
+    columns : list
+        Columns to return.
 
     features_dict : OrderedDict
-        map from feature to feature info from openml. This includes
-        columns that are not ignored.
+        Maps feature name to feature info from openml.
+
+    nrows : int
+        Number of rows to read at a time.
 
     Returns
     -------
-    df : pd.DataFrame
+    dataframe : pandas DataFrame
     """
     pd = check_pandas_support('fetch_openml with return_frame=True')
-    df = pd.DataFrame.from_records(arrf_data['data'],
-                                   columns=list(features_dict.keys()))
-    df = df[all_columns].copy()
 
-    attributes = dict(arrf_data['attributes'])
+    attributes = dict(arrf['attributes'])
+    arrf_columns = list(attributes)
+
+    arrf_data_gen = _chunk_iterable(arrf['data'], nrows)
+    dfs = [pd.DataFrame(list(data), columns=arrf_columns)
+           for data in arrf_data_gen]
+    df = pd.concat(dfs, copy=False)
+
+    columns_to_keep = [col for col in arrf_columns if col in columns]
 
-    dtypes = {}
-    for column in all_columns:
+    # copy dataframe when there are columns that needs to be removed
+    if len(columns_to_keep) != len(arrf_columns):
+        df = df[columns_to_keep].copy()
+
+    for column in columns_to_keep:
         dtype = _feature_to_dtype(features_dict[column])
-        if dtype == object:
-            continue
         if dtype == 'category':
             dtype = pd.CategoricalDtype(attributes[column])
-        dtypes[column] = dtype
-
-    return df.astype(dtypes)
+        df[column] = df[column].astype(dtype, copy=False)
+    return df
 
 
 def _get_data_info_by_name(name, version, data_home):
@@ -493,7 +535,7 @@ def _valid_data_column_names(features_list, target_columns):
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                  target_column='default-target', cache=True, return_X_y=False,
-                 return_frame=False):
+                 return_frame=False, nrows=10000):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -550,19 +592,27 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns a Bunch where the data attribute is a pandas
         DataFrame.
 
+    nrows : int, default=10000
+        Number of rows to read at a time when constructing a dataframe.
+        Only used when ``return_frame`` is True.
+
     Returns
     -------
 
     data : Bunch
         Dictionary-like object, with attributes:
 
-        data : np.array, scipy.sparse.csr_matrix of floats, or pandas Dataframe
+        data : np.array, scipy.sparse.csr_matrix of floats, or None
             The feature matrix. Categorical features are encoded as ordinals.
-            If ``return_frame`` is True, this is a pandas DataFrame.
+            If ``return_frame`` is True, this is None.
         target : np.array or None
             The regression target or classification labels, if applicable.
             Dtype is float if numeric, and object if categorical.
             If ``return_frame`` is True, this is None.
+        dataframe : pandas DataFrame
+            The pandas DataFrame that includes the data and the target.
+            Use ``feature_names`` and ``target_names`` to seperate the target
+            from the features. If ``return_frame`` is False, this is None.
         DESCR : str
             The full description of the dataset
         feature_names : list
@@ -638,8 +688,12 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     if data_description['format'].lower() == 'sparse_arff':
         return_sparse = True
 
-    if return_sparse and return_frame:
-        raise ValueError('Cannot return dataframe with sparse data')
+    if return_frame:
+        if return_sparse:
+            raise ValueError('Cannot return dataframe with sparse data')
+        if return_X_y:
+            raise ValueError('return_X_y=True can not be set when '
+                             'return_frame=True')
 
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
@@ -710,9 +764,11 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         data_description.pop('description'))
 
     if return_frame:
-        all_columns = data_columns + target_column
-        df = _convert_arff_data_dataframe(arff, all_columns, features_dict)
-        return Bunch(data=df, target=None, feature_names=data_columns,
+        columns = data_columns + target_column
+        df = _convert_arff_data_dataframe(arff, columns, features_dict, nrows)
+
+        return Bunch(dataframe=df, data=None, target=None,
+                     feature_names=data_columns,
                      target_names=target_column, DESCR=description,
                      details=data_description, categories=None,
                      url="https://www.openml.org/d/{}".format(data_id))
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 76f368676b598..fcf6c851cf582 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -300,7 +300,7 @@ def test_fetch_openml_iris_pandas(monkeypatch):
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert np.all(df.dtypes == expected_dtypes)
@@ -323,7 +323,7 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 
     bunch = fetch_openml(data_id=data_id, return_frame=True,
                          target_column=target_column, cache=False)
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
@@ -356,7 +356,7 @@ def test_fetch_openml_cpu_pandas(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
@@ -376,6 +376,17 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
         fetch_openml(data_id=data_id, return_frame=True, cache=False)
 
 
+def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
+    data_id =  1119
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    msg = 'return_X_y=True can not be set when return_frame=True'
+    with pytest.raises(ValueError, match=msg):
+        fetch_openml(data_id=data_id, return_frame=True, cache=False,
+                     return_X_y=True)
+
+
 def test_fetch_openml_adultcensus_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
     # Check because of the numeric row attribute (issue #12329)
@@ -386,7 +397,7 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
@@ -409,8 +420,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
@@ -435,8 +445,7 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False,
                          target_column=target_column)
-
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
@@ -470,9 +479,9 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
         'home.dest': object,
         'survived': pd.CategoricalDtype(['0', '1'])
     }
-    expected_columns = ['pclass', 'name', 'sex', 'age', 'sibsp',
+    expected_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
                         'parch', 'ticket', 'fare', 'cabin', 'embarked',
-                        'boat', 'body', 'home.dest', 'survived']
+                        'boat', 'body', 'home.dest']
     expected_dtypes = [name_to_dtype[col] for col in expected_columns]
     expected_feature_names = ['pclass', 'name', 'sex', 'age', 'sibsp',
                               'parch', 'ticket', 'fare', 'cabin', 'embarked',
@@ -481,7 +490,7 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
     bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.data
+    df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape

From 052491f9cfb4a0bc3af660e81ed04cd1b91cd6cf Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 13:38:55 -0400
Subject: [PATCH 07/39] STY Fix

---
 sklearn/datasets/tests/test_openml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index fcf6c851cf582..9e67cb5e8f503 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -377,7 +377,7 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
 
 
 def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
-    data_id =  1119
+    data_id = 1119
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 

From e7a6f9c094a73d2019af17ec08782e7baa2d14b5 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 13:59:15 -0400
Subject: [PATCH 08/39] DOC Adds more comments

---
 sklearn/datasets/openml.py | 4 +++-
 sklearn/utils/__init__.py  | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 6745fcf7e44b3..b242310484ade 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -284,6 +284,8 @@ def _feature_to_dtype(feature):
 
 
 def _chunk_iterable(seq, chunksize):
+    """Chunk ``seq`` into tuples of length ``chunksize``. The last chunk may
+    have a length less than ``chunksize``."""
 
     pad_value = '__PADDING__'
 
@@ -325,7 +327,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
         As obtained from liac-arff object.
 
     columns : list
-        Columns to return.
+        Columns from dataframe to return.
 
     features_dict : OrderedDict
         Maps feature name to feature info from openml.
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 3b94bd85f08d0..b1b246b2b22b5 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -843,6 +843,5 @@ def check_pandas_support(caller_name):
         return pandas
     except ImportError as e:
         raise ImportError(
-            "{} requires pandas. You can install pandas with "
-            "`pip install pandas`".format(caller_name)
+            "{} requires pandas.".format(caller_name)
         ) from e

From 95b4153763348f449aedb8556b2f68a9d4225cd2 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 14:00:00 -0400
Subject: [PATCH 09/39] DOC Fixes example

---
 examples/compose/plot_column_transformer_mixed_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 9233e298d9fce..87a71e51f822b 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -38,7 +38,7 @@
 
 # Read data from Titanic dataset.
 titantic = fetch_openml(data_id=40945, return_frame=True)
-data = titantic.data
+data = titantic.dataframe
 
 # We will train our classifier with the following features:
 # Numeric Features:

From 6c8c7097326b39c53791c376db1b1fa8536d4bc7 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 15:13:09 -0400
Subject: [PATCH 10/39] ENH Uses object types when loading into dataframe

---
 sklearn/datasets/openml.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index b242310484ade..7272d6072e2ce 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -345,7 +345,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
     arrf_columns = list(attributes)
 
     arrf_data_gen = _chunk_iterable(arrf['data'], nrows)
-    dfs = [pd.DataFrame(list(data), columns=arrf_columns)
+    dfs = [pd.DataFrame(list(data), columns=arrf_columns, dtype=object)
            for data in arrf_data_gen]
     df = pd.concat(dfs, copy=False)
 
@@ -594,7 +594,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns a Bunch where the data attribute is a pandas
         DataFrame.
 
-    nrows : int, default=10000
+    nrows : int, default=5000
         Number of rows to read at a time when constructing a dataframe.
         Only used when ``return_frame`` is True.
 
@@ -728,8 +728,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                                             target_column)
 
     # prepare which columns and data types should be returned for the X and y
-    features_dict = OrderedDict([(feature['name'], feature)
-                                for feature in features_list])
+    features_dict = {feature['name']: feature for feature in features_list}
 
     # XXX: col_slice_y should be all nominal or all numeric
     _verify_target_data_type(features_dict, target_column)

From 26b03b219b80df27de74c153265f35a766093374 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 15:40:22 -0400
Subject: [PATCH 11/39] CLN Address comments

---
 sklearn/datasets/openml.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 7272d6072e2ce..9af5d68cadcaa 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -318,7 +318,7 @@ def _chunk_iterable(seq, chunksize):
         yield prev
 
 
-def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
+def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     """Convert the ARFF object into a pandas DataFrame.
 
     Parameters
@@ -332,7 +332,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
     features_dict : OrderedDict
         Maps feature name to feature info from openml.
 
-    nrows : int
+    chunksize : int
         Number of rows to read at a time.
 
     Returns
@@ -344,10 +344,10 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, nrows):
     attributes = dict(arrf['attributes'])
     arrf_columns = list(attributes)
 
-    arrf_data_gen = _chunk_iterable(arrf['data'], nrows)
-    dfs = [pd.DataFrame(list(data), columns=arrf_columns, dtype=object)
+    arrf_data_gen = _chunk_iterable(arrf['data'], chunksize)
+    dfs = [pd.DataFrame(list(data), columns=arrf_columns)
            for data in arrf_data_gen]
-    df = pd.concat(dfs, copy=False)
+    df = pd.concat(dfs)
 
     columns_to_keep = [col for col in arrf_columns if col in columns]
 
@@ -537,7 +537,7 @@ def _valid_data_column_names(features_list, target_columns):
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                  target_column='default-target', cache=True, return_X_y=False,
-                 return_frame=False, nrows=10000):
+                 return_frame=False, chunksize=5000):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -594,7 +594,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns a Bunch where the data attribute is a pandas
         DataFrame.
 
-    nrows : int, default=5000
+    chunksize : int, default=5000
         Number of rows to read at a time when constructing a dataframe.
         Only used when ``return_frame`` is True.
 
@@ -766,7 +766,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     if return_frame:
         columns = data_columns + target_column
-        df = _convert_arff_data_dataframe(arff, columns, features_dict, nrows)
+        df = _convert_arff_data_dataframe(arff, columns, features_dict,
+                                          chunksize)
 
         return Bunch(dataframe=df, data=None, target=None,
                      feature_names=data_columns,

From f5a60bd7251b1c32f6fadde0392264bf22d6678d Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Fri, 24 May 2019 17:31:59 -0400
Subject: [PATCH 12/39] STY Fix

---
 sklearn/datasets/openml.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 9af5d68cadcaa..58d4ba79a49be 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -8,7 +8,6 @@
 from functools import wraps
 import itertools
 from collections.abc import Generator
-from collections import OrderedDict
 from itertools import zip_longest
 
 from urllib.request import urlopen, Request
@@ -329,7 +328,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     columns : list
         Columns from dataframe to return.
 
-    features_dict : OrderedDict
+    features_dict : dict
         Maps feature name to feature info from openml.
 
     chunksize : int

From 599666fd4c147e686fe6c62e41fc50e847c21baa Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Sat, 25 May 2019 16:39:20 -0400
Subject: [PATCH 13/39] TST Fix pandas test

---
 sklearn/datasets/openml.py            |  5 +--
 sklearn/datasets/tests/test_openml.py | 44 ++++++++++++++++-----------
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 58d4ba79a49be..5b9eea939e0b3 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -9,6 +9,7 @@
 import itertools
 from collections.abc import Generator
 from itertools import zip_longest
+from collections import OrderedDict
 
 from urllib.request import urlopen, Request
 
@@ -340,7 +341,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     """
     pd = check_pandas_support('fetch_openml with return_frame=True')
 
-    attributes = dict(arrf['attributes'])
+    attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
 
     arrf_data_gen = _chunk_iterable(arrf['data'], chunksize)
@@ -357,7 +358,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     for column in columns_to_keep:
         dtype = _feature_to_dtype(features_dict[column])
         if dtype == 'category':
-            dtype = pd.CategoricalDtype(attributes[column])
+            dtype = pd.api.types.CategoricalDtype(attributes[column])
         df[column] = df[column].astype(dtype, copy=False)
     return df
 
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 9e67cb5e8f503..990dd08eda2b2 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -286,11 +286,12 @@ def test_feature_to_dtype_error(feature):
 def test_fetch_openml_iris_pandas(monkeypatch):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 61
     expected_shape = (150, 5)
 
-    cat_dtype = pd.CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
-                                     'Iris-virginica'])
+    cat_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                  'Iris-virginica'])
     expected_dtypes = [np.float64] * 4 + [cat_dtype]
     expected_feature_names = ['sepallength', 'sepalwidth', 'petallength',
                               'petalwidth']
@@ -313,6 +314,8 @@ def test_fetch_openml_iris_pandas(monkeypatch):
 def test_fetch_openml_anneal_pandas(monkeypatch):
     # classification dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
+
     data_id = 2
     target_column = 'class'
     expected_shape = (11, 39)
@@ -328,7 +331,7 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -338,17 +341,18 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 def test_fetch_openml_cpu_pandas(monkeypatch):
     # regression dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 561
     expected_shape = (209, 8)
 
-    cat_dtype = pd.CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
-                                     'bti', 'burroughs', 'c.r.d', 'cdc',
-                                     'cambex', 'dec', 'dg', 'formation',
-                                     'four-phase', 'gould', 'hp', 'harris',
-                                     'honeywell', 'ibm', 'ipl', 'magnuson',
-                                     'microdata', 'nas', 'ncr', 'nixdorf',
-                                     'perkin-elmer', 'prime', 'siemens',
-                                     'sperry', 'sratus', 'wang'])
+    cat_dtype = CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
+                                  'bti', 'burroughs', 'c.r.d', 'cdc',
+                                  'cambex', 'dec', 'dg', 'formation',
+                                  'four-phase', 'gould', 'hp', 'harris',
+                                  'honeywell', 'ibm', 'ipl', 'magnuson',
+                                  'microdata', 'nas', 'ncr', 'nixdorf',
+                                  'perkin-elmer', 'prime', 'siemens',
+                                  'sperry', 'sratus', 'wang'])
     expected_dtypes = [cat_dtype] + [np.float64] * 7
     expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
                               'CHMIN', 'CHMAX']
@@ -389,6 +393,8 @@ def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
 
 def test_fetch_openml_adultcensus_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
+
     # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
     expected_shape = (10, 15)
@@ -402,7 +408,7 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -413,6 +419,8 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     # and ignore attributes. Note that data_features json has 82 attributes,
     # and row id (1), ignore attributes (3) have been removed.
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
+
     data_id = 40966
     expected_shape = (7, 78)
     expected_floats = 77
@@ -425,7 +433,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -434,6 +442,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
 def test_fetch_openml_emotions_pandas(monkeypatch):
     # classification dataset with multiple targets (natively)
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40589
     target_column = ['amazed.suprised', 'happy.pleased', 'relaxing.calm',
@@ -450,7 +459,7 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -460,24 +469,25 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
 def test_fetch_openml_titanic_pandas(monkeypatch):
     # dataset with strings
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40945
     expected_shape = (1309, 14)
     name_to_dtype = {
         'pclass': np.float64,
         'name': object,
-        'sex': pd.CategoricalDtype(['female', 'male']),
+        'sex': CategoricalDtype(['female', 'male']),
         'age': np.float64,
         'sibsp': np.float64,
         'parch': np.float64,
         'ticket': object,
         'fare': np.float64,
         'cabin': object,
-        'embarked': pd.CategoricalDtype(['C', 'Q', 'S']),
+        'embarked': CategoricalDtype(['C', 'Q', 'S']),
         'boat': object,
         'body': np.float64,
         'home.dest': object,
-        'survived': pd.CategoricalDtype(['0', '1'])
+        'survived': CategoricalDtype(['0', '1'])
     }
     expected_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
                         'parch', 'ticket', 'fare', 'cabin', 'embarked',

From b8011a6c1a2d1300b0ffcb135d228138176ff56b Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Sat, 25 May 2019 16:54:20 -0400
Subject: [PATCH 14/39] TST Adds small chunksize for testing

---
 sklearn/datasets/tests/test_openml.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 990dd08eda2b2..c8f54e1b4b031 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -283,7 +283,8 @@ def test_feature_to_dtype_error(feature):
         _feature_to_dtype(feature)
 
 
-def test_fetch_openml_iris_pandas(monkeypatch):
+@pytest.mark.parametrize('chunksize', [10, 1000])
+def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
@@ -300,7 +301,8 @@ def test_fetch_openml_iris_pandas(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
+    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False,
+                         chunksize=chunksize)
     df = bunch.dataframe
 
     assert isinstance(df, pd.DataFrame)

From f71aeb6bf4ea660cedba8c3360e6ca8ec006454a Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Sat, 25 May 2019 17:24:15 -0400
Subject: [PATCH 15/39] TST Uses cats directly

---
 .circleci/config.yml                  |  2 +-
 sklearn/datasets/openml.py            |  2 +-
 sklearn/datasets/tests/test_openml.py | 41 +++++++++++----------------
 3 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index f279f577a4641..46f6ca6341204 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,7 +10,7 @@ jobs:
       - PYTHON_VERSION: 3.5
       - NUMPY_VERSION: 1.11.0
       - SCIPY_VERSION: 0.17.0
-      - PANDAS_VERSION: 0.18.0
+      - PANDAS_VERSION: 0.21.0
       - MATPLOTLIB_VERSION: 1.5.1
       - SCIKIT_IMAGE_VERSION: 0.12.3
     steps:
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 5b9eea939e0b3..aadc6edd9b430 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -358,7 +358,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     for column in columns_to_keep:
         dtype = _feature_to_dtype(features_dict[column])
         if dtype == 'category':
-            dtype = pd.api.types.CategoricalDtype(attributes[column])
+            dtype = pd.CategoricalDtype(attributes[column])
         df[column] = df[column].astype(dtype, copy=False)
     return df
 
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index c8f54e1b4b031..f841d5205fe09 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -287,12 +287,11 @@ def test_feature_to_dtype_error(feature):
 def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 61
     expected_shape = (150, 5)
 
-    cat_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
-                                  'Iris-virginica'])
+    cat_dtype = pd.CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                     'Iris-virginica'])
     expected_dtypes = [np.float64] * 4 + [cat_dtype]
     expected_feature_names = ['sepallength', 'sepalwidth', 'petallength',
                               'petalwidth']
@@ -316,7 +315,6 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
 def test_fetch_openml_anneal_pandas(monkeypatch):
     # classification dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 2
     target_column = 'class'
@@ -333,7 +331,7 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, CategoricalDtype)])
+                       if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -343,18 +341,17 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 def test_fetch_openml_cpu_pandas(monkeypatch):
     # regression dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 561
     expected_shape = (209, 8)
 
-    cat_dtype = CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
-                                  'bti', 'burroughs', 'c.r.d', 'cdc',
-                                  'cambex', 'dec', 'dg', 'formation',
-                                  'four-phase', 'gould', 'hp', 'harris',
-                                  'honeywell', 'ibm', 'ipl', 'magnuson',
-                                  'microdata', 'nas', 'ncr', 'nixdorf',
-                                  'perkin-elmer', 'prime', 'siemens',
-                                  'sperry', 'sratus', 'wang'])
+    cat_dtype = pd.CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
+                                     'bti', 'burroughs', 'c.r.d', 'cdc',
+                                     'cambex', 'dec', 'dg', 'formation',
+                                     'four-phase', 'gould', 'hp', 'harris',
+                                     'honeywell', 'ibm', 'ipl', 'magnuson',
+                                     'microdata', 'nas', 'ncr', 'nixdorf',
+                                     'perkin-elmer', 'prime', 'siemens',
+                                     'sperry', 'sratus', 'wang'])
     expected_dtypes = [cat_dtype] + [np.float64] * 7
     expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
                               'CHMIN', 'CHMAX']
@@ -395,7 +392,6 @@ def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
 
 def test_fetch_openml_adultcensus_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
 
     # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
@@ -410,7 +406,7 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, CategoricalDtype)])
+                       if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -421,7 +417,6 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     # and ignore attributes. Note that data_features json has 82 attributes,
     # and row id (1), ignore attributes (3) have been removed.
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40966
     expected_shape = (7, 78)
@@ -435,7 +430,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, CategoricalDtype)])
+                       if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -444,7 +439,6 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
 def test_fetch_openml_emotions_pandas(monkeypatch):
     # classification dataset with multiple targets (natively)
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40589
     target_column = ['amazed.suprised', 'happy.pleased', 'relaxing.calm',
@@ -461,7 +455,7 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, CategoricalDtype)])
+                       if isinstance(dtype, pd.CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -471,25 +465,24 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
 def test_fetch_openml_titanic_pandas(monkeypatch):
     # dataset with strings
     pd = pytest.importorskip('pandas')
-    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40945
     expected_shape = (1309, 14)
     name_to_dtype = {
         'pclass': np.float64,
         'name': object,
-        'sex': CategoricalDtype(['female', 'male']),
+        'sex': pd.CategoricalDtype(['female', 'male']),
         'age': np.float64,
         'sibsp': np.float64,
         'parch': np.float64,
         'ticket': object,
         'fare': np.float64,
         'cabin': object,
-        'embarked': CategoricalDtype(['C', 'Q', 'S']),
+        'embarked': pd.CategoricalDtype(['C', 'Q', 'S']),
         'boat': object,
         'body': np.float64,
         'home.dest': object,
-        'survived': CategoricalDtype(['0', '1'])
+        'survived': pd.CategoricalDtype(['0', '1'])
     }
     expected_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
                         'parch', 'ticket', 'fare', 'cabin', 'embarked',

From bd912624dc4c76ac9f6afb2dbabfdfa103dac8c9 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 28 May 2019 11:45:52 -0400
Subject: [PATCH 16/39] ENH Adds support for pandas 0.23

---
 .circleci/config.yml                  |  1 -
 azure-pipelines.yml                   |  1 +
 sklearn/datasets/openml.py            |  2 +-
 sklearn/datasets/tests/test_openml.py | 41 ++++++++++++++++-----------
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 46f6ca6341204..a162c05db2a81 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,7 +10,6 @@ jobs:
       - PYTHON_VERSION: 3.5
       - NUMPY_VERSION: 1.11.0
       - SCIPY_VERSION: 0.17.0
-      - PANDAS_VERSION: 0.21.0
       - MATPLOTLIB_VERSION: 1.5.1
       - SCIKIT_IMAGE_VERSION: 0.12.3
     steps:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index c31385dd3e48d..c76a97c6664eb 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -20,6 +20,7 @@ jobs:
         INSTALL_MKL: 'false'
         NUMPY_VERSION: '1.11.0'
         SCIPY_VERSION: '0.17.0'
+        PANDAS_VERSION: '*'
         CYTHON_VERSION: '*'
         PILLOW_VERSION: '4.0.0'
         MATPLOTLIB_VERSION: '1.5.1'
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index aadc6edd9b430..5b9eea939e0b3 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -358,7 +358,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     for column in columns_to_keep:
         dtype = _feature_to_dtype(features_dict[column])
         if dtype == 'category':
-            dtype = pd.CategoricalDtype(attributes[column])
+            dtype = pd.api.types.CategoricalDtype(attributes[column])
         df[column] = df[column].astype(dtype, copy=False)
     return df
 
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index f841d5205fe09..c8f54e1b4b031 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -287,11 +287,12 @@ def test_feature_to_dtype_error(feature):
 def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 61
     expected_shape = (150, 5)
 
-    cat_dtype = pd.CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
-                                     'Iris-virginica'])
+    cat_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                  'Iris-virginica'])
     expected_dtypes = [np.float64] * 4 + [cat_dtype]
     expected_feature_names = ['sepallength', 'sepalwidth', 'petallength',
                               'petalwidth']
@@ -315,6 +316,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
 def test_fetch_openml_anneal_pandas(monkeypatch):
     # classification dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 2
     target_column = 'class'
@@ -331,7 +333,7 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -341,17 +343,18 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 def test_fetch_openml_cpu_pandas(monkeypatch):
     # regression dataset with numeric and categorical columns
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 561
     expected_shape = (209, 8)
 
-    cat_dtype = pd.CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
-                                     'bti', 'burroughs', 'c.r.d', 'cdc',
-                                     'cambex', 'dec', 'dg', 'formation',
-                                     'four-phase', 'gould', 'hp', 'harris',
-                                     'honeywell', 'ibm', 'ipl', 'magnuson',
-                                     'microdata', 'nas', 'ncr', 'nixdorf',
-                                     'perkin-elmer', 'prime', 'siemens',
-                                     'sperry', 'sratus', 'wang'])
+    cat_dtype = CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
+                                  'bti', 'burroughs', 'c.r.d', 'cdc',
+                                  'cambex', 'dec', 'dg', 'formation',
+                                  'four-phase', 'gould', 'hp', 'harris',
+                                  'honeywell', 'ibm', 'ipl', 'magnuson',
+                                  'microdata', 'nas', 'ncr', 'nixdorf',
+                                  'perkin-elmer', 'prime', 'siemens',
+                                  'sperry', 'sratus', 'wang'])
     expected_dtypes = [cat_dtype] + [np.float64] * 7
     expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
                               'CHMIN', 'CHMAX']
@@ -392,6 +395,7 @@ def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
 
 def test_fetch_openml_adultcensus_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
@@ -406,7 +410,7 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -417,6 +421,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     # and ignore attributes. Note that data_features json has 82 attributes,
     # and row id (1), ignore attributes (3) have been removed.
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40966
     expected_shape = (7, 78)
@@ -430,7 +435,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -439,6 +444,7 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
 def test_fetch_openml_emotions_pandas(monkeypatch):
     # classification dataset with multiple targets (natively)
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40589
     target_column = ['amazed.suprised', 'happy.pleased', 'relaxing.calm',
@@ -455,7 +461,7 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
     assert isinstance(df, pd.DataFrame)
     assert df.shape == expected_shape
     n_categories = len([dtype for dtype in df.dtypes
-                       if isinstance(dtype, pd.CategoricalDtype)])
+                       if isinstance(dtype, CategoricalDtype)])
     n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
     assert expected_categories == n_categories
     assert expected_floats == n_floats
@@ -465,24 +471,25 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
 def test_fetch_openml_titanic_pandas(monkeypatch):
     # dataset with strings
     pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40945
     expected_shape = (1309, 14)
     name_to_dtype = {
         'pclass': np.float64,
         'name': object,
-        'sex': pd.CategoricalDtype(['female', 'male']),
+        'sex': CategoricalDtype(['female', 'male']),
         'age': np.float64,
         'sibsp': np.float64,
         'parch': np.float64,
         'ticket': object,
         'fare': np.float64,
         'cabin': object,
-        'embarked': pd.CategoricalDtype(['C', 'Q', 'S']),
+        'embarked': CategoricalDtype(['C', 'Q', 'S']),
         'boat': object,
         'body': np.float64,
         'home.dest': object,
-        'survived': pd.CategoricalDtype(['0', '1'])
+        'survived': CategoricalDtype(['0', '1'])
     }
     expected_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
                         'parch', 'ticket', 'fare', 'cabin', 'embarked',

From 22a76ff974346803018af05dc5c75c2d14c63828 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 28 May 2019 12:06:00 -0400
Subject: [PATCH 17/39] DOC Better comments

---
 sklearn/datasets/openml.py | 61 +++++++++++---------------------------
 1 file changed, 17 insertions(+), 44 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 5b9eea939e0b3..ff1a50766bec9 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -8,6 +8,7 @@
 from functools import wraps
 import itertools
 from collections.abc import Generator
+from itertools import islice
 from itertools import zip_longest
 from collections import OrderedDict
 
@@ -283,39 +284,15 @@ def _feature_to_dtype(feature):
     raise ValueError('Unsupported feature: {}'.format(feature))
 
 
-def _chunk_iterable(seq, chunksize):
-    """Chunk ``seq`` into tuples of length ``chunksize``. The last chunk may
-    have a length less than ``chunksize``."""
-
-    pad_value = '__PADDING__'
-
-    args = [iter(seq)] * chunksize
-    it = zip_longest(*args, fillvalue=pad_value)
-    try:
-        prev = next(it)
-    except StopIteration:
-        # Nothing to iterate
-        return
-
-    # yield everything except the final value
-    for item in it:
-        yield prev
-        prev = item
-
-    # handle final value
-    if prev[-1] is pad_value:
-        # uses binary search to find the final index
-        lo, hi = 0, chunksize
-        while lo < hi:
-            mid = (lo + hi) // 2
-            if prev[mid] is pad_value:
-                hi = mid
-            else:
-                lo = mid + 1
-        yield prev[:lo]
-    else:
-        # no padding needed
-        yield prev
+def _chunk_generator(gen, chunksize):
+    """Chunk generator, ``gen`` into tuples of length ``chunksize``. The last
+    chunk may have a length less than ``chunksize``."""
+    while True:
+        chunk = tuple(islice(gen, chunksize))
+        if chunk:
+            yield chunk
+        else:
+            return
 
 
 def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
@@ -344,7 +321,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
 
-    arrf_data_gen = _chunk_iterable(arrf['data'], chunksize)
+    arrf_data_gen = _chunk_generator(arrf['data'], chunksize)
     dfs = [pd.DataFrame(list(data), columns=arrf_columns)
            for data in arrf_data_gen]
     df = pd.concat(dfs)
@@ -595,7 +572,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         DataFrame.
 
     chunksize : int, default=5000
-        Number of rows to read at a time when constructing a dataframe.
+        Number of rows of arrf file to read at a time. Higher values leads to
+        more memory usage.
         Only used when ``return_frame`` is True.
 
     Returns
@@ -604,17 +582,12 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     data : Bunch
         Dictionary-like object, with attributes:
 
-        data : np.array, scipy.sparse.csr_matrix of floats, or None
+        data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame
             The feature matrix. Categorical features are encoded as ordinals.
-            If ``return_frame`` is True, this is None.
-        target : np.array or None
+        target : np.array, pandas Series or DataFrame
             The regression target or classification labels, if applicable.
-            Dtype is float if numeric, and object if categorical.
-            If ``return_frame`` is True, this is None.
-        dataframe : pandas DataFrame
-            The pandas DataFrame that includes the data and the target.
-            Use ``feature_names`` and ``target_names`` to seperate the target
-            from the features. If ``return_frame`` is False, this is None.
+            Dtype is float if numeric, and object if categorical. If
+            ``return_frame`` is True, ``target`` is a pandas object.
         DESCR : str
             The full description of the dataset
         feature_names : list

From 1712492866ef3abfe8ed201bd5dd0ca1c72265dd Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 28 May 2019 12:40:18 -0400
Subject: [PATCH 18/39] CLN Minor

---
 sklearn/datasets/openml.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index ff1a50766bec9..dab5a9556f081 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -9,7 +9,6 @@
 import itertools
 from collections.abc import Generator
 from itertools import islice
-from itertools import zip_longest
 from collections import OrderedDict
 
 from urllib.request import urlopen, Request
@@ -285,10 +284,10 @@ def _feature_to_dtype(feature):
 
 
 def _chunk_generator(gen, chunksize):
-    """Chunk generator, ``gen`` into tuples of length ``chunksize``. The last
+    """Chunk generator, ``gen`` into lists of length ``chunksize``. The last
     chunk may have a length less than ``chunksize``."""
     while True:
-        chunk = tuple(islice(gen, chunksize))
+        chunk = list(islice(gen, chunksize))
         if chunk:
             yield chunk
         else:
@@ -322,7 +321,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     arrf_columns = list(attributes)
 
     arrf_data_gen = _chunk_generator(arrf['data'], chunksize)
-    dfs = [pd.DataFrame(list(data), columns=arrf_columns)
+    dfs = [pd.DataFrame(data, columns=arrf_columns)
            for data in arrf_data_gen]
     df = pd.concat(dfs)
 

From 58c5c2db7edd85a5fac6285846333c61919c5f84 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 29 May 2019 21:20:02 -0400
Subject: [PATCH 19/39] WIP

---
 sklearn/datasets/openml.py            |  30 ++-
 sklearn/datasets/tests/test_openml.py | 319 ++++++++++++++++++--------
 2 files changed, 234 insertions(+), 115 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index dab5a9556f081..cbf738410d6af 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -315,7 +315,7 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     -------
     dataframe : pandas DataFrame
     """
-    pd = check_pandas_support('fetch_openml with return_frame=True')
+    pd = check_pandas_support('fetch_openml with as_frame=True')
 
     attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
@@ -513,7 +513,7 @@ def _valid_data_column_names(features_list, target_columns):
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                  target_column='default-target', cache=True, return_X_y=False,
-                 return_frame=False, chunksize=5000):
+                 as_frame=False, chunksize=5000):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -566,14 +566,14 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns ``(data, target)`` instead of a Bunch object. See
         below for more information about the `data` and `target` objects.
 
-    return_frame : boolean, default=False
+    as_frame : boolean, default=False
         If True, returns a Bunch where the data attribute is a pandas
         DataFrame.
 
     chunksize : int, default=5000
         Number of rows of arrf file to read at a time. Higher values leads to
         more memory usage.
-        Only used when ``return_frame`` is True.
+        Only used when ``as_frame`` is True.
 
     Returns
     -------
@@ -586,16 +586,14 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         target : np.array, pandas Series or DataFrame
             The regression target or classification labels, if applicable.
             Dtype is float if numeric, and object if categorical. If
-            ``return_frame`` is True, ``target`` is a pandas object.
+            ``as_frame`` is True, ``target`` is a pandas object.
         DESCR : str
             The full description of the dataset
         feature_names : list
             The names of the dataset columns
-        target_names : list
-            The names of the target columns
         categories : dict or None
             Maps each categorical feature name to a list of values, such
-            that the value encoded as i is ith in the list. If ``return_frame``
+            that the value encoded as i is ith in the list. If ``as_frame``
             is True, this is None.
         details : dict
             More metadata from OpenML
@@ -662,24 +660,24 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     if data_description['format'].lower() == 'sparse_arff':
         return_sparse = True
 
-    if return_frame:
+    if as_frame:
         if return_sparse:
             raise ValueError('Cannot return dataframe with sparse data')
         if return_X_y:
             raise ValueError('return_X_y=True can not be set when '
-                             'return_frame=True')
+                             'as_frame=True')
 
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
 
-    if not return_frame:
+    if not as_frame:
         for feature in features_list:
             if 'true' in (feature['is_ignore'], feature['is_row_identifier']):
                 continue
             if feature['data_type'] == 'string':
                 raise ValueError('STRING attributes are not supported for '
                                  'arrays as a return value. Try '
-                                 'return_frame=True')
+                                 'as_frame=True')
 
     if target_column == "default-target":
         # determines the default target based on the data feature results
@@ -731,19 +729,18 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     # obtain the data
     arff = _download_data_arff(data_description['file_id'], return_sparse,
-                               data_home, encode_nominal=not return_frame)
+                               data_home, encode_nominal=not as_frame)
 
     description = "{}\n\nDownloaded from openml.org.".format(
         data_description.pop('description'))
 
-    if return_frame:
+    if as_frame:
         columns = data_columns + target_column
         df = _convert_arff_data_dataframe(arff, columns, features_dict,
                                           chunksize)
 
         return Bunch(dataframe=df, data=None, target=None,
-                     feature_names=data_columns,
-                     target_names=target_column, DESCR=description,
+                     feature_names=data_columns, DESCR=description,
                      details=data_description, categories=None,
                      url="https://www.openml.org/d/{}".format(data_id))
 
@@ -782,7 +779,6 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     bunch = Bunch(
         data=X, target=y, feature_names=data_columns,
-        target_names=target_column,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index c8f54e1b4b031..07549c71f60f2 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -289,28 +289,80 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 61
-    expected_shape = (150, 5)
+    data_shape = (150, 4)
+    target_shape = (150, )
+    frame_shape = (150, 5)
 
-    cat_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
-                                  'Iris-virginica'])
-    expected_dtypes = [np.float64] * 4 + [cat_dtype]
-    expected_feature_names = ['sepallength', 'sepalwidth', 'petallength',
-                              'petalwidth']
-    expected_target_names = ['class']
-    expected_columns = expected_feature_names + expected_target_names
+    target_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                     'Iris-virginica'])
+    data_dtypes = [np.float64] * 4
+    data_names = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
+    target_names = 'class'
+    columns = data_names + [target_names]
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False,
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False,
                          chunksize=chunksize)
-    df = bunch.dataframe
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
 
-    assert isinstance(df, pd.DataFrame)
-    assert np.all(df.dtypes == expected_dtypes)
-    assert df.shape == expected_shape
-    assert np.all(df.columns == expected_columns)
-    assert np.all(bunch.feature_names == expected_feature_names)
-    assert np.all(bunch.target_names == expected_target_names)
+    assert isinstance(data, pd.DataFrame)
+    assert np.all(data.dtypes == data_dtypes)
+    assert data.shape == data_shape
+    assert np.all(data.columns == columns)
+    assert np.all(bunch.feature_names == data_names)
+
+    assert isinstance(target, pd.Series)
+    assert target.dtype == target_dtype
+    assert target.shape == target_shape
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
+    assert np.all(frame.dtype == data_dtypes + [target_dtype])
+
+
+@pytest.mark.parametrize('chunksize', [10, 1000])
+def test_fetch_openml_iris_multitarget_pandas(monkeypatch, chunksize):
+    # classification dataset with numeric only columns
+    pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
+    data_id = 61
+    data_shape = (150, 3)
+    target_shape = (150, 2)
+    frame_shape = (150, 5)
+    target_column = ['petalwidth', 'petallength']
+
+    target_dtype = [CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                     'Iris-virginica']), np.float64]
+    data_dtypes = [np.float64] * 3
+    data_names = ['sepallength', 'sepalwidth', 'class']
+    target_names = ['petalwidth', 'petallength']
+    columns = data_names + target_names
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False,
+                         chunksize=chunksize, target_column=target_column)
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
+
+    assert isinstance(data, pd.DataFrame)
+    assert np.all(data.dtypes == data_dtypes)
+    assert data.shape == data_shape
+    assert np.all(data.columns == columns)
+    assert np.all(bunch.feature_names == data_names)
+
+    assert isinstance(target, pd.DataFrame)
+    assert np.all(target.dtypes == target_dtype)
+    assert target.shape == target_shape
+    assert np.all(target.columns == target_column)
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
+    assert np.all(frame.dtype == data_dtypes + target_dtype)
 
 
 def test_fetch_openml_anneal_pandas(monkeypatch):
@@ -320,24 +372,34 @@ def test_fetch_openml_anneal_pandas(monkeypatch):
 
     data_id = 2
     target_column = 'class'
-    expected_shape = (11, 39)
-    expected_categories = 33
-    expected_floats = 6
+    data_shape = (11, 38)
+    target_shape = (11,)
+    frame_shape = (11, 39)
+    expected_data_categories = 32
+    expected_data_floats = 6
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    bunch = fetch_openml(data_id=data_id, return_frame=True,
+    bunch = fetch_openml(data_id=data_id, as_frame=True,
                          target_column=target_column, cache=False)
-    df = bunch.dataframe
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    n_categories = len([dtype for dtype in df.dtypes
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
+    n_categories = len([dtype for dtype in data.dtypes
                        if isinstance(dtype, CategoricalDtype)])
-    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
-    assert expected_categories == n_categories
-    assert expected_floats == n_floats
-    assert np.all(bunch.target_names == [target_column])
+    n_floats = len([dtype for dtype in data.dtypes if dtype.kind == 'f'])
+    assert expected_data_categories == n_categories
+    assert expected_data_floats == n_floats
+
+    assert isinstance(target, pd.Series)
+    assert target.shape == target_shape
+    assert isinstance(target.dtype, CategoricalDtype)
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
 
 
 def test_fetch_openml_cpu_pandas(monkeypatch):
@@ -345,7 +407,9 @@ def test_fetch_openml_cpu_pandas(monkeypatch):
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
     data_id = 561
-    expected_shape = (209, 8)
+    data_shape = (209, 7)
+    target_shape = (209, )
+    frame_shape = (209, 8)
 
     cat_dtype = CategoricalDtype(['adviser', 'amdahl', 'apollo', 'basf',
                                   'bti', 'burroughs', 'c.r.d', 'cdc',
@@ -355,21 +419,30 @@ def test_fetch_openml_cpu_pandas(monkeypatch):
                                   'microdata', 'nas', 'ncr', 'nixdorf',
                                   'perkin-elmer', 'prime', 'siemens',
                                   'sperry', 'sratus', 'wang'])
-    expected_dtypes = [cat_dtype] + [np.float64] * 7
-    expected_feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
-                              'CHMIN', 'CHMAX']
-    expected_target_names = ['class']
+    data_dtypes = [cat_dtype] + [np.float64] * 6
+    feature_names = ['vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH',
+                     'CHMIN', 'CHMAX']
+    target_name = 'class'
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.dataframe
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    assert np.all(df.dtypes == expected_dtypes)
-    assert np.all(df.columns == expected_feature_names + expected_target_names)
-    assert np.all(bunch.feature_names == expected_feature_names)
-    assert np.all(bunch.target_names == expected_target_names)
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
+    assert np.all(data.dtypes == data_dtypes)
+    assert np.all(data.columns == feature_names)
+    assert np.all(bunch.feature_names == feature_names)
+
+    assert isinstance(target, pd.Series)
+    assert target.shape == target_shape
+    assert target.dtype == np.float64
+    assert target.name == target_name
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
 
 
 def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
@@ -379,7 +452,7 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
 
     msg = 'Cannot return dataframe with sparse data'
     with pytest.raises(ValueError, match=msg):
-        fetch_openml(data_id=data_id, return_frame=True, cache=False)
+        fetch_openml(data_id=data_id, as_frame=True, cache=False)
 
 
 def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
@@ -387,9 +460,9 @@ def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    msg = 'return_X_y=True can not be set when return_frame=True'
+    msg = 'return_X_y=True can not be set when as_frame=True'
     with pytest.raises(ValueError, match=msg):
-        fetch_openml(data_id=data_id, return_frame=True, cache=False,
+        fetch_openml(data_id=data_id, as_frame=True, cache=False,
                      return_X_y=True)
 
 
@@ -399,21 +472,34 @@ def test_fetch_openml_adultcensus_pandas(monkeypatch):
 
     # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
-    expected_shape = (10, 15)
-    expected_categories = 9
-    expected_floats = 6
+    data_shape = (10, 14)
+    target_shape = (10, )
+    frame_shape = (10, 15)
 
-    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.dataframe
+    expected_data_categories = 8
+    expected_data_floats = 6
+    target_column = 'class'
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    n_categories = len([dtype for dtype in df.dtypes
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
+
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
+    n_categories = len([dtype for dtype in data.dtypes
                        if isinstance(dtype, CategoricalDtype)])
-    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
-    assert expected_categories == n_categories
-    assert expected_floats == n_floats
+    n_floats = len([dtype for dtype in data.dtypes if dtype.kind == 'f'])
+    assert expected_data_categories == n_categories
+    assert expected_data_floats == n_floats
+
+    assert isinstance(target, pd.Series)
+    assert target.shape == target_shape
+    assert target.name == target_column
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
 
 
 def test_fetch_openml_miceprotein_pandas(monkeypatch):
@@ -424,21 +510,36 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
     CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40966
-    expected_shape = (7, 78)
-    expected_floats = 77
-    expected_categories = 1
+    data_shape = (7, 77)
+    target_shape = (7, )
+    frame_shape = (7, 78)
 
-    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.dataframe
+    target_column = 'class'
+    frame_n_categories = 1
+    frame_n_floats = 77
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    n_categories = len([dtype for dtype in df.dtypes
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
+
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
+    assert np.all(data.dtypes == np.float64)
+
+    assert isinstance(target, pd.Series)
+    assert isinstance(target.dtype, CategoricalDtype)
+    assert target.shape == target_shape
+    assert target.name == target_column
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
+    n_categories = len([dtype for dtype in data.dtypes
                        if isinstance(dtype, CategoricalDtype)])
-    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
-    assert expected_categories == n_categories
-    assert expected_floats == n_floats
+    n_floats = len([dtype for dtype in data.dtypes if dtype.kind == 'f'])
+    assert frame_n_categories == n_categories
+    assert frame_n_floats == n_floats
 
 
 def test_fetch_openml_emotions_pandas(monkeypatch):
@@ -449,23 +550,34 @@ def test_fetch_openml_emotions_pandas(monkeypatch):
     data_id = 40589
     target_column = ['amazed.suprised', 'happy.pleased', 'relaxing.calm',
                      'quiet.still', 'sad.lonely', 'angry.aggresive']
-    expected_shape = (13, 78)
-    expected_categories = 6
-    expected_floats = 72
+    data_shape = (13, 72)
+    target_shape = (13, 6)
+    frame_shape = (13, 78)
+
+    expected_frame_categories = 6
+    expected_frame_floats = 72
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False,
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False,
                          target_column=target_column)
-    df = bunch.dataframe
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
+
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    n_categories = len([dtype for dtype in df.dtypes
+    assert isinstance(target, pd.DataFrame)
+    assert target.shape == target_shape
+    assert np.all(target.columns == target_column)
+
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
+    n_categories = len([dtype for dtype in frame.dtypes
                        if isinstance(dtype, CategoricalDtype)])
-    n_floats = len([dtype for dtype in df.dtypes if dtype.kind == 'f'])
-    assert expected_categories == n_categories
-    assert expected_floats == n_floats
-    assert np.all(bunch.target_names == target_column)
+    n_floats = len([dtype for dtype in frame.dtypes if dtype.kind == 'f'])
+    assert expected_frame_categories == n_categories
+    assert expected_frame_floats == n_floats
 
 
 def test_fetch_openml_titanic_pandas(monkeypatch):
@@ -474,7 +586,9 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
     CategoricalDtype = pd.api.types.CategoricalDtype
 
     data_id = 40945
-    expected_shape = (1309, 14)
+    data_shape = (1309, 13)
+    target_shape = (1309, )
+    frame_shape = (1309, 14)
     name_to_dtype = {
         'pclass': np.float64,
         'name': object,
@@ -491,25 +605,34 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
         'home.dest': object,
         'survived': CategoricalDtype(['0', '1'])
     }
-    expected_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
-                        'parch', 'ticket', 'fare', 'cabin', 'embarked',
-                        'boat', 'body', 'home.dest']
-    expected_dtypes = [name_to_dtype[col] for col in expected_columns]
-    expected_feature_names = ['pclass', 'name', 'sex', 'age', 'sibsp',
-                              'parch', 'ticket', 'fare', 'cabin', 'embarked',
-                              'boat', 'body', 'home.dest']
-    expected_target_names = ['survived']
+
+    frame_columns = ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp',
+                     'parch', 'ticket', 'fare', 'cabin', 'embarked',
+                     'boat', 'body', 'home.dest']
+    frame_dtypes = [name_to_dtype[col] for col in frame_columns]
+    feature_names = ['pclass', 'name', 'sex', 'age', 'sibsp',
+                     'parch', 'ticket', 'fare', 'cabin', 'embarked',
+                     'boat', 'body', 'home.dest']
+    target_name = 'survived'
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
-    bunch = fetch_openml(data_id=data_id, return_frame=True, cache=False)
-    df = bunch.dataframe
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
+    data = bunch.data
+    target = bunch.target
+    frame = bunch.frame
+
+    assert isinstance(data, pd.DataFrame)
+    assert data.shape == data_shape
+    assert np.all(data.columns == feature_names)
+
+    assert isinstance(target, pd.Series)
+    assert target.shape == target_shape
+    assert target.name == target_name
+    assert target.dtype == name_to_dtype[target_name]
 
-    assert isinstance(df, pd.DataFrame)
-    assert df.shape == expected_shape
-    assert np.all(df.dtypes == expected_dtypes)
-    assert np.all(df.columns == expected_columns)
-    assert np.all(bunch.feature_names == expected_feature_names)
-    assert np.all(bunch.target_names == expected_target_names)
+    assert isinstance(frame, pd.DataFrame)
+    assert frame.shape == frame_shape
+    assert np.all(data.dtypes == frame_dtypes)
 
 
 @pytest.mark.parametrize('gzip_response', [True, False])
@@ -924,7 +1047,7 @@ def test_string_attribute_without_dataframe(monkeypatch, gzip_response):
     # single column test
     assert_raise_message(ValueError,
                          ('STRING attributes are not supported for arrays as '
-                          'a return value. Try return_frame=True'),
+                          'a return value. Try as_frame=True'),
                          fetch_openml, data_id=data_id, cache=False)
 
 
From 8780000ab0e903c093ad44f03944c0113610b821 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 14:53:04 -0400
Subject: [PATCH 20/39] ENH Return data and target

---
 sklearn/datasets/openml.py            | 10 ++++++++--
 sklearn/datasets/tests/test_openml.py | 27 +++++++++++++--------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index cbf738410d6af..65e0a0bdf6a6d 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -738,8 +738,14 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         columns = data_columns + target_column
         df = _convert_arff_data_dataframe(arff, columns, features_dict,
                                           chunksize)
-
-        return Bunch(dataframe=df, data=None, target=None,
+        X = df[data_columns]
+        if len(target_column) >= 2:
+            y = df[target_column]
+        elif len(target_column) == 1:
+            y = df[target_column[0]]
+        else:
+            y = None
+        return Bunch(frame=df, data=X, target=y,
                      feature_names=data_columns, DESCR=description,
                      details=data_description, categories=None,
                      url="https://www.openml.org/d/{}".format(data_id))
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 07549c71f60f2..f13a223e1033c 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -298,7 +298,6 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     data_dtypes = [np.float64] * 4
     data_names = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
     target_names = 'class'
-    columns = data_names + [target_names]
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
@@ -311,7 +310,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     assert isinstance(data, pd.DataFrame)
     assert np.all(data.dtypes == data_dtypes)
     assert data.shape == data_shape
-    assert np.all(data.columns == columns)
+    assert np.all(data.columns == data_names)
     assert np.all(bunch.feature_names == data_names)
 
     assert isinstance(target, pd.Series)
@@ -320,7 +319,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
 
     assert isinstance(frame, pd.DataFrame)
     assert frame.shape == frame_shape
-    assert np.all(frame.dtype == data_dtypes + [target_dtype])
+    assert np.all(frame.dtypes == data_dtypes + [target_dtype])
 
 
 @pytest.mark.parametrize('chunksize', [10, 1000])
@@ -334,12 +333,12 @@ def test_fetch_openml_iris_multitarget_pandas(monkeypatch, chunksize):
     frame_shape = (150, 5)
     target_column = ['petalwidth', 'petallength']
 
-    target_dtype = [CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
-                                     'Iris-virginica']), np.float64]
-    data_dtypes = [np.float64] * 3
+    cat_dtype = CategoricalDtype(['Iris-setosa', 'Iris-versicolor',
+                                  'Iris-virginica'])
+    data_dtypes = [np.float64, np.float64] + [cat_dtype]
     data_names = ['sepallength', 'sepalwidth', 'class']
+    target_dtypes = [np.float64, np.float64]
     target_names = ['petalwidth', 'petallength']
-    columns = data_names + target_names
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
@@ -352,17 +351,17 @@ def test_fetch_openml_iris_multitarget_pandas(monkeypatch, chunksize):
     assert isinstance(data, pd.DataFrame)
     assert np.all(data.dtypes == data_dtypes)
     assert data.shape == data_shape
-    assert np.all(data.columns == columns)
+    assert np.all(data.columns == data_names)
     assert np.all(bunch.feature_names == data_names)
 
     assert isinstance(target, pd.DataFrame)
-    assert np.all(target.dtypes == target_dtype)
+    assert np.all(target.dtypes == target_dtypes)
     assert target.shape == target_shape
-    assert np.all(target.columns == target_column)
+    assert np.all(target.columns == target_names)
 
     assert isinstance(frame, pd.DataFrame)
     assert frame.shape == frame_shape
-    assert np.all(frame.dtype == data_dtypes + target_dtype)
+    assert np.all(frame.dtypes == [np.float64] * 4 + [cat_dtype])
 
 
 def test_fetch_openml_anneal_pandas(monkeypatch):
@@ -535,9 +534,9 @@ def test_fetch_openml_miceprotein_pandas(monkeypatch):
 
     assert isinstance(frame, pd.DataFrame)
     assert frame.shape == frame_shape
-    n_categories = len([dtype for dtype in data.dtypes
+    n_categories = len([dtype for dtype in frame.dtypes
                        if isinstance(dtype, CategoricalDtype)])
-    n_floats = len([dtype for dtype in data.dtypes if dtype.kind == 'f'])
+    n_floats = len([dtype for dtype in frame.dtypes if dtype.kind == 'f'])
     assert frame_n_categories == n_categories
     assert frame_n_floats == n_floats
 
@@ -632,7 +631,7 @@ def test_fetch_openml_titanic_pandas(monkeypatch):
 
     assert isinstance(frame, pd.DataFrame)
     assert frame.shape == frame_shape
-    assert np.all(data.dtypes == frame_dtypes)
+    assert np.all(frame.dtypes == frame_dtypes)
 
 
 @pytest.mark.parametrize('gzip_response', [True, False])

From a7519cd30a291e518892093209406c530337f532 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 14:57:23 -0400
Subject: [PATCH 21/39] ENH Adds support for return_X_y

---
 sklearn/datasets/openml.py            |  5 ++---
 sklearn/datasets/tests/test_openml.py | 28 ++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 65e0a0bdf6a6d..0699d040bc241 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -663,9 +663,6 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     if as_frame:
         if return_sparse:
             raise ValueError('Cannot return dataframe with sparse data')
-        if return_X_y:
-            raise ValueError('return_X_y=True can not be set when '
-                             'as_frame=True')
 
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
@@ -745,6 +742,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
             y = df[target_column[0]]
         else:
             y = None
+        if return_X_y:
+            return X, y
         return Bunch(frame=df, data=X, target=y,
                      feature_names=data_columns, DESCR=description,
                      details=data_description, categories=None,
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index f13a223e1033c..4760bfc7ef336 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -454,15 +454,33 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
         fetch_openml(data_id=data_id, as_frame=True, cache=False)
 
 
-def test_fetch_openml_adultcensus_pandas_return_X_y_errors(monkeypatch):
+def test_fetch_openml_adultcensus_pandas_return_X_y(monkeypatch):
+    pd = pytest.importorskip('pandas')
+    CategoricalDtype = pd.api.types.CategoricalDtype
+
+    # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
+    data_shape = (10, 14)
+    target_shape = (10, )
+
+    expected_data_categories = 8
+    expected_data_floats = 6
+    target_column = 'class'
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+    X, y = fetch_openml(data_id=data_id, as_frame=True, cache=False,
+                        return_X_y=True)
+    assert isinstance(X, pd.DataFrame)
+    assert X.shape == data_shape
+    n_categories = len([dtype for dtype in X.dtypes
+                       if isinstance(dtype, CategoricalDtype)])
+    n_floats = len([dtype for dtype in X.dtypes if dtype.kind == 'f'])
+    assert expected_data_categories == n_categories
+    assert expected_data_floats == n_floats
 
-    msg = 'return_X_y=True can not be set when as_frame=True'
-    with pytest.raises(ValueError, match=msg):
-        fetch_openml(data_id=data_id, as_frame=True, cache=False,
-                     return_X_y=True)
+    assert isinstance(y, pd.Series)
+    assert y.shape == target_shape
+    assert y.name == target_column
 
 
 def test_fetch_openml_adultcensus_pandas(monkeypatch):

From 5396d8da6ac568cac27aedd9758cd7d98c3e9ae5 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 14:58:28 -0400
Subject: [PATCH 22/39] ENH Update example

---
 examples/compose/plot_column_transformer_mixed_types.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 87a71e51f822b..162d31832417c 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -37,8 +37,9 @@
 np.random.seed(0)
 
 # Read data from Titanic dataset.
-titantic = fetch_openml(data_id=40945, return_frame=True)
-data = titantic.dataframe
+titantic = fetch_openml(data_id=40945, as_frame=True)
+X = titantic.data
+y = titantic.target
 
 # We will train our classifier with the following features:
 # Numeric Features:
@@ -70,9 +71,6 @@
 clf = Pipeline(steps=[('preprocessor', preprocessor),
                       ('classifier', LogisticRegression())])
 
-X = data.drop('survived', axis=1)
-y = data['survived']
-
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
 clf.fit(X_train, y_train)

From b33fcf9c27e4201902711f51baac2b881781de35 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 14:59:44 -0400
Subject: [PATCH 23/39] STY Lint

---
 sklearn/datasets/tests/test_openml.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 4760bfc7ef336..b1796ac2e4f25 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -297,7 +297,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
                                      'Iris-virginica'])
     data_dtypes = [np.float64] * 4
     data_names = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
-    target_names = 'class'
+    target_name = 'class'
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
@@ -316,6 +316,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     assert isinstance(target, pd.Series)
     assert target.dtype == target_dtype
     assert target.shape == target_shape
+    assert target.name == target_name
 
     assert isinstance(frame, pd.DataFrame)
     assert frame.shape == frame_shape

From 706a2545e6fd46f26ad1356e718c83eac8003b79 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 15:41:18 -0400
Subject: [PATCH 24/39] BUG Removes target_column

---
 sklearn/datasets/tests/test_openml.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index b1796ac2e4f25..e945920a5801a 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -94,12 +94,10 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
     if isinstance(target_column, str):
         # single target, so target is vector
         assert data_by_id.target.shape == (expected_observations, )
-        assert data_by_id.target_names[0] == target_column
     elif isinstance(target_column, list):
         # multi target, so target is array
         assert data_by_id.target.shape == (expected_observations,
                                            len(target_column))
-        assert np.all(data_by_id.target_names == target_column)
     assert data_by_id.data.dtype == np.float64
     assert data_by_id.target.dtype == expected_target_dtype
     assert len(data_by_id.feature_names) == expected_features

From 7a7de894e6754f3f9b71b7c9fe4e01fd64433e1b Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Jun 2019 21:59:20 -0400
Subject: [PATCH 25/39] BLD Trigger CI


From 9568d4cbfc310f837c3000227a985de0621a7537 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 5 Jun 2019 10:52:10 -0400
Subject: [PATCH 26/39] ENH Uses working_memory to calcuate chunksize

---
 sklearn/datasets/openml.py            | 40 ++++++++++++++++-----------
 sklearn/datasets/tests/test_openml.py | 25 +++++++++++------
 2 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 0699d040bc241..9b93a9dd32e40 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -10,6 +10,7 @@
 from collections.abc import Generator
 from itertools import islice
 from collections import OrderedDict
+import warnings
 
 from urllib.request import urlopen, Request
 
@@ -20,6 +21,7 @@
 from .base import get_data_home
 from urllib.error import HTTPError
 from ..utils import Bunch
+from .. import get_config
 from ..utils import check_pandas_support  # noqa
 
 __all__ = ['fetch_openml']
@@ -294,7 +296,7 @@ def _chunk_generator(gen, chunksize):
             return
 
 
-def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
+def _convert_arff_data_dataframe(arrf, columns, features_dict):
     """Convert the ARFF object into a pandas DataFrame.
 
     Parameters
@@ -308,9 +310,6 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     features_dict : dict
         Maps feature name to feature info from openml.
 
-    chunksize : int
-        Number of rows to read at a time.
-
     Returns
     -------
     dataframe : pandas DataFrame
@@ -320,9 +319,25 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict, chunksize):
     attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
 
-    arrf_data_gen = _chunk_generator(arrf['data'], chunksize)
-    dfs = [pd.DataFrame(data, columns=arrf_columns)
-           for data in arrf_data_gen]
+    # calculate chunksize
+    working_memory = get_config()['working_memory']
+    first_row = next(arrf['data'])
+    first_df = pd.DataFrame([first_row], columns=arrf_columns)
+
+    row_bytes = first_df.memory_usage(deep=True).sum()
+    chunksize = int(working_memory * (2 ** 20) // row_bytes)
+
+    if chunksize < 1:
+        warnings.warn('Could not adhere to working_memory config. '
+                      'Currently %.0fMiB, %.0fMiB required.' %
+                      (working_memory, np.ceil(row_bytes * 2 ** -20)))
+        chunksize = 1
+
+    # read arrf data with chunks
+    dfs = []
+    dfs.append(first_df)
+    for data in _chunk_generator(arrf['data'], chunksize):
+        dfs.append(pd.DataFrame(data, columns=arrf_columns))
     df = pd.concat(dfs)
 
     columns_to_keep = [col for col in arrf_columns if col in columns]
@@ -570,11 +585,6 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         If True, returns a Bunch where the data attribute is a pandas
         DataFrame.
 
-    chunksize : int, default=5000
-        Number of rows of arrf file to read at a time. Higher values leads to
-        more memory usage.
-        Only used when ``as_frame`` is True.
-
     Returns
     -------
 
@@ -673,8 +683,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                 continue
             if feature['data_type'] == 'string':
                 raise ValueError('STRING attributes are not supported for '
-                                 'arrays as a return value. Try '
-                                 'as_frame=True')
+                                 'array representation. Try as_frame=True')
 
     if target_column == "default-target":
         # determines the default target based on the data feature results
@@ -733,8 +742,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
 
     if as_frame:
         columns = data_columns + target_column
-        df = _convert_arff_data_dataframe(arff, columns, features_dict,
-                                          chunksize)
+        df = _convert_arff_data_dataframe(arff, columns, features_dict)
         X = df[data_columns]
         if len(target_column) >= 2:
             y = df[target_column]
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index e945920a5801a..4e8db429e5d64 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -9,6 +9,7 @@
 import sklearn
 import pytest
 
+from sklearn import config_context
 from sklearn.datasets import fetch_openml
 from sklearn.datasets.openml import (_open_openml_url,
                                      _get_data_description_by_id,
@@ -281,8 +282,7 @@ def test_feature_to_dtype_error(feature):
         _feature_to_dtype(feature)
 
 
-@pytest.mark.parametrize('chunksize', [10, 1000])
-def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
+def test_fetch_openml_iris_pandas(monkeypatch):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
@@ -299,8 +299,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
 
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
-    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False,
-                         chunksize=chunksize)
+    bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
     data = bunch.data
     target = bunch.target
     frame = bunch.frame
@@ -321,8 +320,7 @@ def test_fetch_openml_iris_pandas(monkeypatch, chunksize):
     assert np.all(frame.dtypes == data_dtypes + [target_dtype])
 
 
-@pytest.mark.parametrize('chunksize', [10, 1000])
-def test_fetch_openml_iris_multitarget_pandas(monkeypatch, chunksize):
+def test_fetch_openml_iris_multitarget_pandas(monkeypatch):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
@@ -342,7 +340,7 @@ def test_fetch_openml_iris_multitarget_pandas(monkeypatch, chunksize):
     _monkey_patch_webbased_functions(monkeypatch, data_id, True)
 
     bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False,
-                         chunksize=chunksize, target_column=target_column)
+                         target_column=target_column)
     data = bunch.data
     target = bunch.target
     frame = bunch.frame
@@ -453,11 +451,22 @@ def test_fetch_openml_australian_pandas_error_sparse(monkeypatch):
         fetch_openml(data_id=data_id, as_frame=True, cache=False)
 
 
+def test_convert_arff_data_dataframe_warning_low_memory_pandas(monkeypatch):
+    pytest.importorskip('pandas')
+
+    data_id = 1119
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    msg = 'Could not adhere to working_memory config.'
+    with pytest.warns(UserWarning, match=msg):
+        with config_context(working_memory=1e-6):
+            fetch_openml(data_id=data_id, as_frame=True, cache=False)
+
+
 def test_fetch_openml_adultcensus_pandas_return_X_y(monkeypatch):
     pd = pytest.importorskip('pandas')
     CategoricalDtype = pd.api.types.CategoricalDtype
 
-    # Check because of the numeric row attribute (issue #12329)
     data_id = 1119
     data_shape = (10, 14)
     target_shape = (10, )

From 36d11e32551917f5720177e425a0ca0b73415ddb Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 5 Jun 2019 15:56:57 -0400
Subject: [PATCH 27/39] TST Fix error message

---
 sklearn/datasets/tests/test_openml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 4e8db429e5d64..361d59991ce0a 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -1071,8 +1071,8 @@ def test_string_attribute_without_dataframe(monkeypatch, gzip_response):
     _monkey_patch_webbased_functions(monkeypatch, data_id, gzip_response)
     # single column test
     assert_raise_message(ValueError,
-                         ('STRING attributes are not supported for arrays as '
-                          'a return value. Try as_frame=True'),
+                         ('STRING attributes are not supported for '
+                          'array representation. Try as_frame=True'),
                          fetch_openml, data_id=data_id, cache=False)
 
 
From 7edb62cc3a477d28541afe2bd75a2557d3ffe9f7 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 5 Jun 2019 16:44:25 -0400
Subject: [PATCH 28/39] DOC Adds frame to docs

---
 sklearn/datasets/openml.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 9b93a9dd32e40..9cc1eca8e5d9d 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -528,7 +528,7 @@ def _valid_data_column_names(features_list, target_columns):
 
 def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                  target_column='default-target', cache=True, return_X_y=False,
-                 as_frame=False, chunksize=5000):
+                 as_frame=False):
     """Fetch dataset from openml by name or dataset id.
 
     Datasets are uniquely identified by either an integer ID or by a
@@ -607,6 +607,9 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
             is True, this is None.
         details : dict
             More metadata from OpenML
+        frame : pandas DataFrame
+            DataFrame with ``data`` and ``target``. This is set when
+            ``as_frame`` is True.
 
     (data, target) : tuple if ``return_X_y`` is True
 

From fb10fd110458619744cf122ccb4c9c6c644a9a3f Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Wed, 5 Jun 2019 16:49:09 -0400
Subject: [PATCH 29/39] BLD Trigger CI


From 87cc0b08acbd47d437c307a4724918404926661d Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Jun 2019 14:21:10 -0400
Subject: [PATCH 30/39] CLN Only create bunch once

---
 sklearn/datasets/openml.py | 80 +++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 9cc1eca8e5d9d..3d899e1e4c08e 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -743,58 +743,56 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     description = "{}\n\nDownloaded from openml.org.".format(
         data_description.pop('description'))
 
+    nominal_attributes = None
+    frame = None
     if as_frame:
         columns = data_columns + target_column
-        df = _convert_arff_data_dataframe(arff, columns, features_dict)
-        X = df[data_columns]
+        frame = _convert_arff_data_dataframe(arff, columns, features_dict)
+        X = frame[data_columns]
         if len(target_column) >= 2:
-            y = df[target_column]
+            y = frame[target_column]
         elif len(target_column) == 1:
-            y = df[target_column[0]]
+            y = frame[target_column[0]]
         else:
             y = None
-        if return_X_y:
-            return X, y
-        return Bunch(frame=df, data=X, target=y,
-                     feature_names=data_columns, DESCR=description,
-                     details=data_description, categories=None,
-                     url="https://www.openml.org/d/{}".format(data_id))
-
-    # nominal attributes is a dict mapping from the attribute name to the
-    # possible values. Includes also the target column (which will be popped
-    # off below, before it will be packed in the Bunch object)
-    nominal_attributes = {k: v for k, v in arff['attributes']
-                          if isinstance(v, list) and
-                          k in data_columns + target_column}
-
-    X, y = _convert_arff_data(arff['data'], col_slice_x, col_slice_y, shape)
-
-    is_classification = {col_name in nominal_attributes
-                         for col_name in target_column}
-    if not is_classification:
-        # No target
-        pass
-    elif all(is_classification):
-        y = np.hstack([np.take(np.asarray(nominal_attributes.pop(col_name),
-                                          dtype='O'),
-                               y[:, i:i+1].astype(int, copy=False))
-                       for i, col_name in enumerate(target_column)])
-    elif any(is_classification):
-        raise ValueError('Mix of nominal and non-nominal targets is not '
-                         'currently supported')
-
-    # reshape y back to 1-D array, if there is only 1 target column; back
-    # to None if there are not target columns
-    if y.shape[1] == 1:
-        y = y.reshape((-1,))
-    elif y.shape[1] == 0:
-        y = None
+    else:
+        # nominal attributes is a dict mapping from the attribute name to the
+        # possible values. Includes also the target column (which will be popped
+        # off below, before it will be packed in the Bunch object)
+        nominal_attributes = {k: v for k, v in arff['attributes']
+                              if isinstance(v, list) and
+                              k in data_columns + target_column}
+
+        X, y = _convert_arff_data(arff['data'], col_slice_x, col_slice_y, shape)
+
+        is_classification = {col_name in nominal_attributes
+                             for col_name in target_column}
+        if not is_classification:
+            # No target
+            pass
+        elif all(is_classification):
+            y = np.hstack([
+                np.take(
+                    np.asarray(nominal_attributes.pop(col_name), dtype='O'),
+                    y[:, i:i + 1].astype(int, copy=False))
+                for i, col_name in enumerate(target_column)
+            ])
+        elif any(is_classification):
+            raise ValueError('Mix of nominal and non-nominal targets is not '
+                             'currently supported')
+
+        # reshape y back to 1-D array, if there is only 1 target column; back
+        # to None if there are not target columns
+        if y.shape[1] == 1:
+            y = y.reshape((-1,))
+        elif y.shape[1] == 0:
+            y = None
 
     if return_X_y:
         return X, y
 
     bunch = Bunch(
-        data=X, target=y, feature_names=data_columns,
+        data=X, target=y, frame=frame, feature_names=data_columns,
         DESCR=description, details=data_description,
         categories=nominal_attributes,
         url="https://www.openml.org/d/{}".format(data_id))

From ebd12f546799a13e1613d6a84654718382bc46a7 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Jun 2019 15:58:38 -0400
Subject: [PATCH 31/39] CLN Uses target_columns (plural)

---
 sklearn/datasets/openml.py | 56 ++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 3d899e1e4c08e..d9393e578f568 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -21,6 +21,7 @@
 from .base import get_data_home
 from urllib.error import HTTPError
 from ..utils import Bunch
+from ..utils import get_chunk_n_rows
 from .. import get_config
 from ..utils import check_pandas_support  # noqa
 
@@ -320,18 +321,11 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict):
     arrf_columns = list(attributes)
 
     # calculate chunksize
-    working_memory = get_config()['working_memory']
     first_row = next(arrf['data'])
     first_df = pd.DataFrame([first_row], columns=arrf_columns)
 
     row_bytes = first_df.memory_usage(deep=True).sum()
-    chunksize = int(working_memory * (2 ** 20) // row_bytes)
-
-    if chunksize < 1:
-        warnings.warn('Could not adhere to working_memory config. '
-                      'Currently %.0fMiB, %.0fMiB required.' %
-                      (working_memory, np.ceil(row_bytes * 2 ** -20)))
-        chunksize = 1
+    chunksize = get_chunk_n_rows(row_bytes)
 
     # read arrf data with chunks
     dfs = []
@@ -582,8 +576,11 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         below for more information about the `data` and `target` objects.
 
     as_frame : boolean, default=False
-        If True, returns a Bunch where the data attribute is a pandas
-        DataFrame.
+        If True, where the data is a pandas DataFrame including columns with
+        appropriate dtypes (numeric, string or categorical). The target is
+        a pandas DataFrame or Series depending on the number of target_columns.
+        If ``return_X_y`` is True, then ``(data, target)`` will be pandas
+        DataFrames or Series as describe above.
 
     Returns
     -------
@@ -673,9 +670,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     if data_description['format'].lower() == 'sparse_arff':
         return_sparse = True
 
-    if as_frame:
-        if return_sparse:
-            raise ValueError('Cannot return dataframe with sparse data')
+    if as_frame and return_sparse:
+        raise ValueError('Cannot return dataframe with sparse data')
 
     # download data features, meta-info about column types
     features_list = _get_data_features(data_id, data_home)
@@ -692,28 +688,30 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         # determines the default target based on the data feature results
         # (which is currently more reliable than the data description;
         # see issue: https://github.com/openml/OpenML/issues/768)
-        target_column = [feature['name'] for feature in features_list
-                         if feature['is_target'] == 'true']
+        target_columns = [feature['name'] for feature in features_list
+                          if feature['is_target'] == 'true']
     elif isinstance(target_column, str):
         # for code-simplicity, make target_column by default a list
-        target_column = [target_column]
+        target_columns = [target_column]
     elif target_column is None:
-        target_column = []
-    elif not isinstance(target_column, list):
+        target_columns = []
+    elif isinstance(target_column, list):
+        target_columns = target_column
+    else:
         raise TypeError("Did not recognize type of target_column"
                         "Should be str, list or None. Got: "
                         "{}".format(type(target_column)))
     data_columns = _valid_data_column_names(features_list,
-                                            target_column)
+                                            target_columns)
 
     # prepare which columns and data types should be returned for the X and y
     features_dict = {feature['name']: feature for feature in features_list}
 
     # XXX: col_slice_y should be all nominal or all numeric
-    _verify_target_data_type(features_dict, target_column)
+    _verify_target_data_type(features_dict, target_columns)
 
     col_slice_y = [int(features_dict[col_name]['index'])
-                   for col_name in target_column]
+                   for col_name in target_columns]
 
     col_slice_x = [int(features_dict[col_name]['index'])
                    for col_name in data_columns]
@@ -746,13 +744,13 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
     nominal_attributes = None
     frame = None
     if as_frame:
-        columns = data_columns + target_column
+        columns = data_columns + target_columns
         frame = _convert_arff_data_dataframe(arff, columns, features_dict)
         X = frame[data_columns]
-        if len(target_column) >= 2:
-            y = frame[target_column]
-        elif len(target_column) == 1:
-            y = frame[target_column[0]]
+        if len(target_columns) >= 2:
+            y = frame[target_columns]
+        elif len(target_columns) == 1:
+            y = frame[target_columns[0]]
         else:
             y = None
     else:
@@ -761,12 +759,12 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         # off below, before it will be packed in the Bunch object)
         nominal_attributes = {k: v for k, v in arff['attributes']
                               if isinstance(v, list) and
-                              k in data_columns + target_column}
+                              k in data_columns + target_columns}
 
         X, y = _convert_arff_data(arff['data'], col_slice_x, col_slice_y, shape)
 
         is_classification = {col_name in nominal_attributes
-                             for col_name in target_column}
+                             for col_name in target_columns}
         if not is_classification:
             # No target
             pass
@@ -775,7 +773,7 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
                 np.take(
                     np.asarray(nominal_attributes.pop(col_name), dtype='O'),
                     y[:, i:i + 1].astype(int, copy=False))
-                for i, col_name in enumerate(target_column)
+                for i, col_name in enumerate(target_columns)
             ])
         elif any(is_classification):
             raise ValueError('Mix of nominal and non-nominal targets is not '

From 7cd6f30dd69ddc9ba34a03c697670116a8e37347 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Jun 2019 16:07:54 -0400
Subject: [PATCH 32/39] CLN Fliter columns sooner

---
 sklearn/datasets/openml.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index d9393e578f568..de31977d0dede 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -319,10 +319,11 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict):
 
     attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
+    columns_to_keep = [col for col in arrf_columns if col in columns]
 
     # calculate chunksize
     first_row = next(arrf['data'])
-    first_df = pd.DataFrame([first_row], columns=arrf_columns)
+    first_df = pd.DataFrame([first_row], columns=arrf_columns)[columns_to_keep]
 
     row_bytes = first_df.memory_usage(deep=True).sum()
     chunksize = get_chunk_n_rows(row_bytes)
@@ -331,15 +332,9 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict):
     dfs = []
     dfs.append(first_df)
     for data in _chunk_generator(arrf['data'], chunksize):
-        dfs.append(pd.DataFrame(data, columns=arrf_columns))
+        dfs.append(pd.DataFrame(data, columns=arrf_columns)[columns_to_keep])
     df = pd.concat(dfs)
 
-    columns_to_keep = [col for col in arrf_columns if col in columns]
-
-    # copy dataframe when there are columns that needs to be removed
-    if len(columns_to_keep) != len(arrf_columns):
-        df = df[columns_to_keep].copy()
-
     for column in columns_to_keep:
         dtype = _feature_to_dtype(features_dict[column])
         if dtype == 'category':

From 00274d755764fb56d6acf45b06db2c44e9d69658 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Jun 2019 16:13:01 -0400
Subject: [PATCH 33/39] CLN Filter earlier

---
 sklearn/datasets/openml.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index de31977d0dede..ea441d4421411 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -319,18 +319,18 @@ def _convert_arff_data_dataframe(arrf, columns, features_dict):
 
     attributes = OrderedDict(arrf['attributes'])
     arrf_columns = list(attributes)
-    columns_to_keep = [col for col in arrf_columns if col in columns]
 
     # calculate chunksize
     first_row = next(arrf['data'])
-    first_df = pd.DataFrame([first_row], columns=arrf_columns)[columns_to_keep]
+    first_df = pd.DataFrame([first_row], columns=arrf_columns)
 
     row_bytes = first_df.memory_usage(deep=True).sum()
     chunksize = get_chunk_n_rows(row_bytes)
 
     # read arrf data with chunks
+    columns_to_keep = [col for col in arrf_columns if col in columns]
     dfs = []
-    dfs.append(first_df)
+    dfs.append(first_df[columns_to_keep])
     for data in _chunk_generator(arrf['data'], chunksize):
         dfs.append(pd.DataFrame(data, columns=arrf_columns)[columns_to_keep])
     df = pd.concat(dfs)

From 65c575c607705e5d25e42168ee53deb47aa1d2ea Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Thu, 13 Jun 2019 16:18:12 -0400
Subject: [PATCH 34/39] STY Flake8

---
 sklearn/datasets/openml.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index ea441d4421411..63139a6cce37f 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -10,7 +10,6 @@
 from collections.abc import Generator
 from itertools import islice
 from collections import OrderedDict
-import warnings
 
 from urllib.request import urlopen, Request
 
@@ -22,7 +21,6 @@
 from urllib.error import HTTPError
 from ..utils import Bunch
 from ..utils import get_chunk_n_rows
-from .. import get_config
 from ..utils import check_pandas_support  # noqa
 
 __all__ = ['fetch_openml']
@@ -750,13 +748,14 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
             y = None
     else:
         # nominal attributes is a dict mapping from the attribute name to the
-        # possible values. Includes also the target column (which will be popped
-        # off below, before it will be packed in the Bunch object)
+        # possible values. Includes also the target column (which will be
+        # popped off below, before it will be packed in the Bunch object)
         nominal_attributes = {k: v for k, v in arff['attributes']
                               if isinstance(v, list) and
                               k in data_columns + target_columns}
 
-        X, y = _convert_arff_data(arff['data'], col_slice_x, col_slice_y, shape)
+        X, y = _convert_arff_data(arff['data'], col_slice_x,
+                                  col_slice_y, shape)
 
         is_classification = {col_name in nominal_attributes
                              for col_name in target_columns}

From 52211bb19ff07c41fc6abe08c57ab9d1d8e9c1e4 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 25 Jun 2019 09:28:41 -0400
Subject: [PATCH 35/39] TST Adds check for all numerical data

---
 sklearn/datasets/tests/test_openml.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index 361d59991ce0a..de13f96675f16 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -20,6 +20,7 @@
 from sklearn.utils.testing import (assert_warns_message,
                                    assert_raise_message)
 from sklearn.utils import is_scalar_nan
+from sklearn.utils.testing import assert_allclose, assert_array_equal
 from urllib.error import HTTPError
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
@@ -320,6 +321,25 @@ def test_fetch_openml_iris_pandas(monkeypatch):
     assert np.all(frame.dtypes == data_dtypes + [target_dtype])
 
 
+def test_fetch_openml_iris_pandas_equal_to_no_frame(monkeypatch):
+    # as_frame = True returns the same underlying data as as_frame = False
+    pytest.importorskip('pandas')
+    data_id = 61
+
+    _monkey_patch_webbased_functions(monkeypatch, data_id, True)
+
+    frame_bunch = fetch_openml(data_id=data_id, as_frame=True, cache=False)
+    frame_data = frame_bunch.data
+    frame_target = frame_bunch.target
+
+    norm_bunch = fetch_openml(data_id=data_id, as_frame=False, cache=False)
+    norm_data = norm_bunch.data
+    norm_target = norm_bunch.target
+
+    assert_allclose(norm_data, frame_data)
+    assert_array_equal(norm_target, frame_target)
+
+
 def test_fetch_openml_iris_multitarget_pandas(monkeypatch):
     # classification dataset with numeric only columns
     pd = pytest.importorskip('pandas')

From 8b5610bf9f654faf8cecd68f9a82823f806df936 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 2 Jul 2019 11:22:06 -0400
Subject: [PATCH 36/39] CLN Moves _chunk_generator to utils

---
 sklearn/datasets/openml.py | 13 +------------
 sklearn/utils/__init__.py  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 63139a6cce37f..413091ec192fa 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -8,7 +8,6 @@
 from functools import wraps
 import itertools
 from collections.abc import Generator
-from itertools import islice
 from collections import OrderedDict
 
 from urllib.request import urlopen, Request
@@ -21,6 +20,7 @@
 from urllib.error import HTTPError
 from ..utils import Bunch
 from ..utils import get_chunk_n_rows
+from ..utils import _chunk_generator
 from ..utils import check_pandas_support  # noqa
 
 __all__ = ['fetch_openml']
@@ -284,17 +284,6 @@ def _feature_to_dtype(feature):
     raise ValueError('Unsupported feature: {}'.format(feature))
 
 
-def _chunk_generator(gen, chunksize):
-    """Chunk generator, ``gen`` into lists of length ``chunksize``. The last
-    chunk may have a length less than ``chunksize``."""
-    while True:
-        chunk = list(islice(gen, chunksize))
-        if chunk:
-            yield chunk
-        else:
-            return
-
-
 def _convert_arff_data_dataframe(arrf, columns, features_dict):
     """Convert the ARFF object into a pandas DataFrame.
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 8216ff49ba5c5..4528c2ba0caeb 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -3,6 +3,7 @@
 """
 from collections.abc import Sequence
 from contextlib import contextmanager
+from itertools import islice
 import numbers
 import platform
 import struct
@@ -477,6 +478,17 @@ def safe_sqr(X, copy=True):
     return X
 
 
+def _chunk_generator(gen, chunksize):
+    """Chunk generator, ``gen`` into lists of length ``chunksize``. The last
+    chunk may have a length less than ``chunksize``."""
+    while True:
+        chunk = list(islice(gen, chunksize))
+        if chunk:
+            yield chunk
+        else:
+            return
+
+
 def gen_batches(n, batch_size, min_batch_size=0):
     """Generator to create slices containing batch_size elements, from 0 to n.
 

From 3873332fd4fcdbe6ef16004084aca5aba30b12d1 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 2 Jul 2019 11:27:01 -0400
Subject: [PATCH 37/39] DOC Adds whats_new

---
 doc/whats_new/v0.22.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index e998294e6d255..336a13e7c3bea 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -39,6 +39,12 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
+:mod:`sklearn.datasets`
+.......................
+
+- |Feature| :func:`fetch_openml` now supports heterogeneous data using pandas 
+  by setting `as_frame=True`. :pr:`13902` by `Thomas Fan`_.
+
 :mod:`sklearn.ensemble`
 .......................
 

From e14742038077ded71bbff083024048a5cc8702e5 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Tue, 2 Jul 2019 11:58:57 -0400
Subject: [PATCH 38/39] CLN: Update doc/whats_new/v0.22.rst

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 doc/whats_new/v0.22.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index e3bf4f8a1ca30..00d816d30c023 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -42,7 +42,7 @@ Changelog
 :mod:`sklearn.datasets`
 .......................
 
-- |Feature| :func:`fetch_openml` now supports heterogeneous data using pandas 
+- |Feature| :func:`datasets.fetch_openml` now supports heterogeneous data using pandas 
   by setting `as_frame=True`. :pr:`13902` by `Thomas Fan`_.
 
 :mod:`sklearn.decomposition`

From c34707eb533860fcd4a6d0a7a4afb126e5e3e92c Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Tue, 2 Jul 2019 13:23:11 -0400
Subject: [PATCH 39/39] CLN Address comments

---
 examples/compose/plot_column_transformer_mixed_types.py | 4 ++++
 sklearn/datasets/openml.py                              | 9 +++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 162d31832417c..0f6c5d3c222c6 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -41,6 +41,10 @@
 X = titantic.data
 y = titantic.target
 
+# Alternatively X and y can be obtained directly from the frame attribute:
+# X = titantic.frame.drop('survived', axis=1)
+# y = titantic.frame['survived']
+
 # We will train our classifier with the following features:
 # Numeric Features:
 # - age: float.
diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py
index 413091ec192fa..3d82027e29118 100644
--- a/sklearn/datasets/openml.py
+++ b/sklearn/datasets/openml.py
@@ -558,10 +558,11 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         below for more information about the `data` and `target` objects.
 
     as_frame : boolean, default=False
-        If True, where the data is a pandas DataFrame including columns with
+        If True, the data is a pandas DataFrame including columns with
         appropriate dtypes (numeric, string or categorical). The target is
         a pandas DataFrame or Series depending on the number of target_columns.
-        If ``return_X_y`` is True, then ``(data, target)`` will be pandas
+        The Bunch will contain a ``frame`` attribute with the target and the
+        data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas
         DataFrames or Series as describe above.
 
     Returns
@@ -587,8 +588,8 @@ def fetch_openml(name=None, version='active', data_id=None, data_home=None,
         details : dict
             More metadata from OpenML
         frame : pandas DataFrame
-            DataFrame with ``data`` and ``target``. This is set when
-            ``as_frame`` is True.
+            Only present when `as_frame=True`. DataFrame with ``data`` and
+            ``target``.
 
     (data, target) : tuple if ``return_X_y`` is True