From 33868d1edda783fbfaf3cd5476a3441a01b51b7d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 5 Jun 2019 15:55:11 +0200
Subject: [PATCH 01/86] TST add test to ensure support of pipeline in PDP

---
 .../tests/test_partial_dependence.py          | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 0b9405e471741..1ed74a2e3c347 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -23,6 +23,8 @@
 from sklearn.cluster import KMeans
 from sklearn.metrics import r2_score
 from sklearn.preprocessing import PolynomialFeatures
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
 from sklearn.dummy import DummyClassifier
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.utils.testing import assert_allclose
@@ -393,6 +395,31 @@ def test_partial_dependence_sample_weight():
     assert np.corrcoef(pdp, values)[0, 1] > 0.99
 
 
+def test_partial_dependence_pipeline():
+    # check that the partial dependence support pipeline
+    iris = load_iris()
+
+    scaler = StandardScaler()
+    clf = DummyClassifier(random_state=42)
+    pipe = make_pipeline(scaler, clf)
+
+    clf.fit(scaler.fit_transform(iris.data), iris.target)
+    pipe.fit(iris.data, iris.target)
+
+    features = 0
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, iris.data, features=[features]
+    )
+    pdp_clf, values_clf = partial_dependence(
+        clf, scaler.transform(iris.data), features=[features]
+    )
+    assert_allclose(pdp_pipe, pdp_clf)
+    assert_allclose(
+        values_pipe[0],
+        (values_clf[0] * scaler.scale_[features]) + scaler.mean_[features]
+    )
+
+
 def test_plot_partial_dependence(pyplot):
     # Test partial dependence plot function.
     boston = load_boston()

From f2035fe1e0a1389029e38cce17882ecc0daae8fc Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 5 Jun 2019 17:27:12 +0200
Subject: [PATCH 02/86] EHN add support for dataframe in PDP

---
 sklearn/compose/_column_transformer.py        | 18 ++++-
 sklearn/inspection/partial_dependence.py      | 75 ++++++++++++-------
 .../tests/test_partial_dependence.py          | 67 ++++++++++++++++-
 sklearn/utils/testing.py                      | 11 +++
 4 files changed, 136 insertions(+), 35 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 1bfae5d200e13..5c234611e6aa5 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -649,7 +649,12 @@ def _get_column_indices(X, key):
     if (_check_key_type(key, int)
             or hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_)):
         # Convert key into positive indexes
-        idx = np.arange(n_columns)[key]
+        try:
+            idx = np.arange(n_columns)[key]
+        except IndexError as e:
+            raise ValueError(
+                'all features must be in [0, %d]' % (n_columns - 1)
+            ) from e
         return np.atleast_1d(idx).tolist()
     elif _check_key_type(key, str):
         try:
@@ -672,7 +677,16 @@ def _get_column_indices(X, key):
         else:
             columns = list(key)
 
-        return [all_columns.index(col) for col in columns]
+        try:
+            column_indices = [all_columns.index(col) for col in columns]
+        except ValueError as e:
+            if 'not in list' in str(e):
+                raise ValueError(
+                    "A given feature is not a column of the dataframe"
+                ) from e
+            raise
+
+        return column_indices
     else:
         raise ValueError("No valid specification of the columns. Only a "
                          "scalar, list or slice of all integers or all "
diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 3191dcd7a1352..d9f2758dbcb21 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -11,10 +11,15 @@
 from collections.abc import Iterable
 
 import numpy as np
+from scipy import sparse
 from scipy.stats.mstats import mquantiles
 from joblib import Parallel, delayed
 
+from ..compose._column_transformer import _get_column
+from ..compose._column_transformer import _get_column_indices
+
 from ..base import is_classifier, is_regressor
+from ..pipeline import Pipeline
 from ..utils.extmath import cartesian
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
@@ -70,16 +75,16 @@ def _grid_from_X(X, percentiles, grid_resolution):
 
     values = []
     for feature in range(X.shape[1]):
-        uniques = np.unique(X[:, feature])
+        uniques = np.unique(_get_column(X, feature))
         if uniques.shape[0] < grid_resolution:
             # feature has low resolution use unique vals
             axis = uniques
         else:
             # create axis based on percentiles and grid resolution
-            emp_percentiles = mquantiles(X[:, feature], prob=percentiles,
-                                         axis=0)
-            if np.allclose(emp_percentiles[0],
-                           emp_percentiles[1]):
+            emp_percentiles = mquantiles(
+                _get_column(X, feature), prob=percentiles, axis=0
+            )
+            if np.allclose(emp_percentiles[0], emp_percentiles[1]):
                 raise ValueError(
                     'percentiles are too close to each other, '
                     'unable to build the grid. Please choose percentiles '
@@ -146,7 +151,10 @@ def _partial_dependence_brute(est, grid, features, X, response_method):
     for new_values in grid:
         X_eval = X.copy()
         for i, variable in enumerate(features):
-            X_eval[:, variable] = new_values[i]
+            if hasattr(X_eval, 'iloc'):
+                X_eval.iloc[:, variable] = new_values[i]
+            else:
+                X_eval[:, variable] = new_values[i]
 
         try:
             predictions = prediction_method(X_eval)
@@ -203,6 +211,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         ``X`` is used both to generate a grid of values for the
         ``features``, and to compute the averaged predictions when
         method is 'brute'.
+    # TODO: update the type accepted
     features : list or array-like of int
         The target features for which the partial dependency should be
         computed.
@@ -284,16 +293,20 @@ def partial_dependence(estimator, X, features, response_method='auto',
     dependence values are incorrect for 'recursion'.
 
     """
+    preprocessor = estimator[:-1] if isinstance(estimator, Pipeline) else None
+    final_estimator = (estimator[-1] if isinstance(estimator, Pipeline)
+                       else estimator)
 
-    if not (is_classifier(estimator) or is_regressor(estimator)):
+    if not (is_classifier(final_estimator) or is_regressor(final_estimator)):
         raise ValueError(
             "'estimator' must be a fitted regressor or classifier.")
 
-    if (hasattr(estimator, 'classes_') and
-            isinstance(estimator.classes_[0], np.ndarray)):
+    if (hasattr(final_estimator, 'classes_') and
+            isinstance(final_estimator.classes_[0], np.ndarray)):
         raise ValueError('Multiclass-multioutput estimators are not supported')
 
-    X = check_array(X)
+    if not(hasattr(X, '__array__') or sparse.issparse(X)):
+        X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
 
     accepted_responses = ('auto', 'predict_proba', 'decision_function')
     if response_method not in accepted_responses:
@@ -301,7 +314,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
             'response_method {} is invalid. Accepted response_method names '
             'are {}.'.format(response_method, ', '.join(accepted_responses)))
 
-    if is_regressor(estimator) and response_method != 'auto':
+    if is_regressor(final_estimator) and response_method != 'auto':
         raise ValueError(
             "The response_method parameter is ignored for regressors and "
             "must be 'auto'."
@@ -313,14 +326,14 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 method, ', '.join(accepted_methods)))
 
     if method == 'auto':
-        if (isinstance(estimator, BaseGradientBoosting) and
-                estimator.init is None):
+        if (isinstance(final_estimator, BaseGradientBoosting) and
+                final_estimator.init is None):
             method = 'recursion'
         else:
             method = 'brute'
 
     if method == 'recursion':
-        if not isinstance(estimator, BaseGradientBoosting):
+        if not isinstance(final_estimator, BaseGradientBoosting):
             raise ValueError(
                 "'estimator' must be an instance of BaseGradientBoosting "
                 "for the 'recursion' method. Try using method='brute'.")
@@ -332,27 +345,31 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        check_is_fitted(estimator, 'estimators_',
+        check_is_fitted(final_estimator, 'estimators_',
                         msg="'estimator' parameter must be a fitted estimator")
-        # Note: if method is brute, this check is done at prediction time
-        n_features = estimator.n_features_
+
+    features_indices = np.asarray(
+        _get_column_indices(X, features), dtype=np.int32, order='C'
+    ).ravel()
+
+    if method == 'recursion' and preprocessor is not None:
+        X_preprocessed = preprocessor.transform(X)
     else:
-        n_features = X.shape[1]
+        X_preprocessed = X
 
-    features = np.asarray(features, dtype=np.int32, order='C').ravel()
-    if any(not (0 <= f < n_features) for f in features):
-        raise ValueError('all features must be in [0, %d]'
-                         % (n_features - 1))
+    grid, values = _grid_from_X(
+        _get_column(X_preprocessed, features_indices), percentiles,
+        grid_resolution
+    )
 
-    grid, values = _grid_from_X(X[:, features], percentiles,
-                                grid_resolution)
     if method == 'brute':
-        averaged_predictions = _partial_dependence_brute(estimator, grid,
-                                                         features, X,
-                                                         response_method)
+        averaged_predictions = _partial_dependence_brute(
+            estimator, grid, features_indices, X, response_method
+        )
     else:
-        averaged_predictions = _partial_dependence_recursion(estimator, grid,
-                                                             features)
+        averaged_predictions = _partial_dependence_recursion(
+            final_estimator, grid, features_indices
+        )
 
     # reshape averaged_predictions to
     # (n_outputs, n_values_feature_0, n_values_feature_1, ...)
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 1ed74a2e3c347..88946aa770436 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -21,14 +21,17 @@
 from sklearn.datasets import load_boston, load_iris
 from sklearn.datasets import make_classification, make_regression
 from sklearn.cluster import KMeans
+from sklearn.compose import make_column_transformer
 from sklearn.metrics import r2_score
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import RobustScaler
 from sklearn.pipeline import make_pipeline
 from sklearn.dummy import DummyClassifier
-from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
+from sklearn.utils.testing import skip_if_no_pandas
 
 
 # toy sample
@@ -44,6 +47,9 @@
 regression_data = (make_regression(random_state=0), 1)
 multioutput_regression_data = (make_regression(n_targets=2, random_state=0), 2)
 
+# iris
+iris = load_iris()
+
 
 @pytest.mark.parametrize('Estimator, method, data', [
     (GradientBoostingClassifier, 'recursion', binary_classification_data),
@@ -244,7 +250,6 @@ def test_partial_dependence_easy_target(est, power):
     assert r2 > .99
 
 
-@pytest.mark.filterwarnings('ignore:The default value of ')  # 0.22
 @pytest.mark.parametrize('Estimator',
                          (sklearn.tree.DecisionTreeClassifier,
                           sklearn.tree.ExtraTreeClassifier,
@@ -321,16 +326,32 @@ def test_partial_dependence_error(estimator, params, err_msg):
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
 )
-@pytest.mark.parametrize('features', [-1, 1000000])
-def test_partial_dependence_unknown_feature(estimator, features):
+def test_partial_dependence_unknown_feature_indices(estimator):
     X, y = make_classification(random_state=0)
     estimator.fit(X, y)
 
+    features = 100000
     err_msg = 'all features must be in'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, X, [features])
 
 
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_unknown_feature_string(estimator):
+    pd = pytest.importorskip("pandas")
+    X, y = make_classification(random_state=0)
+    df = pd.DataFrame(X)
+    estimator.fit(df, y)
+
+    features = 'random'
+    err_msg = 'A given feature is not a column of the dataframe'
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, df, [features])
+
+
 @pytest.mark.parametrize(
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
@@ -420,6 +441,44 @@ def test_partial_dependence_pipeline():
     )
 
 
+@pytest.mark.parametrize(
+    "estimator",
+    [LogisticRegression(max_iter=1000, random_state=0),
+     GradientBoostingClassifier(random_state=0, n_estimators=5)],
+    ids=['estimator-brute', 'estimator-recursion']
+)
+@pytest.mark.parametrize(
+    "preprocessor",
+    [None,
+     make_column_transformer((StandardScaler(), iris.feature_names[:2]),
+                             (RobustScaler(), iris.feature_names[2:]))],
+    ids=['None', 'column-transformer']
+)
+@pytest.mark.parametrize(
+    "features",
+    [[0, 1], iris.feature_names[:2]],
+    ids=['features-integer', 'features-string']
+)
+def test_partial_dependence_dataframe(estimator, preprocessor, features):
+    # check that the partial dependence support dataframe
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    pipe = make_pipeline(preprocessor, estimator)
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(pipe, df, features=features)
+
+    X_preprocessed = (clone(preprocessor).fit_transform(df)
+                      if preprocessor is not None else df.values)
+    clf = clone(estimator).fit(X_preprocessed, iris.target)
+    pdp_clf, values_clf = partial_dependence(
+        clf, X_preprocessed, features=[0, 1]
+    )
+
+    assert_allclose(pdp_pipe, pdp_clf)
+    # assert_allclose(values_pipe, values_clf)
+
+
 def test_plot_partial_dependence(pyplot):
     # Test partial dependence plot function.
     boston = load_boston()
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 3ad4096c1d091..6b3f5649ea08e 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -581,6 +581,17 @@ def set_random_state(estimator, random_state=0):
     skip_if_no_parallel = pytest.mark.skipif(not joblib.parallel.mp,
                                              reason="joblib is in serial mode")
 
+    def has_pandas():
+        try:
+            import pandas
+            return True
+        except ImportError:
+            return False
+
+    skip_if_no_pandas = pytest.mark.skipif(
+        not has_pandas, reason="pandas is not installed"
+    )
+
     #  Decorator for tests involving both BLAS calls and multiprocessing.
     #
     #  Under POSIX (e.g. Linux or OSX), using multiprocessing in conjunction

From 133c11696035dae57834b837d3983dee8c595fcc Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 6 Jun 2019 14:47:24 +0200
Subject: [PATCH 03/86] revert to brute method for pipeline

---
 sklearn/inspection/partial_dependence.py      | 31 ++++++----------
 .../tests/test_partial_dependence.py          | 35 +++++++++++++------
 sklearn/utils/testing.py                      | 11 ------
 3 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index d9f2758dbcb21..ead089780cd3d 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -19,7 +19,6 @@
 from ..compose._column_transformer import _get_column_indices
 
 from ..base import is_classifier, is_regressor
-from ..pipeline import Pipeline
 from ..utils.extmath import cartesian
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
@@ -293,16 +292,12 @@ def partial_dependence(estimator, X, features, response_method='auto',
     dependence values are incorrect for 'recursion'.
 
     """
-    preprocessor = estimator[:-1] if isinstance(estimator, Pipeline) else None
-    final_estimator = (estimator[-1] if isinstance(estimator, Pipeline)
-                       else estimator)
-
-    if not (is_classifier(final_estimator) or is_regressor(final_estimator)):
+    if not (is_classifier(estimator) or is_regressor(estimator)):
         raise ValueError(
             "'estimator' must be a fitted regressor or classifier.")
 
-    if (hasattr(final_estimator, 'classes_') and
-            isinstance(final_estimator.classes_[0], np.ndarray)):
+    if (hasattr(estimator, 'classes_') and
+            isinstance(estimator.classes_[0], np.ndarray)):
         raise ValueError('Multiclass-multioutput estimators are not supported')
 
     if not(hasattr(X, '__array__') or sparse.issparse(X)):
@@ -314,7 +309,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
             'response_method {} is invalid. Accepted response_method names '
             'are {}.'.format(response_method, ', '.join(accepted_responses)))
 
-    if is_regressor(final_estimator) and response_method != 'auto':
+    if is_regressor(estimator) and response_method != 'auto':
         raise ValueError(
             "The response_method parameter is ignored for regressors and "
             "must be 'auto'."
@@ -326,14 +321,14 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 method, ', '.join(accepted_methods)))
 
     if method == 'auto':
-        if (isinstance(final_estimator, BaseGradientBoosting) and
-                final_estimator.init is None):
+        if (isinstance(estimator, BaseGradientBoosting) and
+                estimator.init is None):
             method = 'recursion'
         else:
             method = 'brute'
 
     if method == 'recursion':
-        if not isinstance(final_estimator, BaseGradientBoosting):
+        if not isinstance(estimator, BaseGradientBoosting):
             raise ValueError(
                 "'estimator' must be an instance of BaseGradientBoosting "
                 "for the 'recursion' method. Try using method='brute'.")
@@ -345,21 +340,15 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        check_is_fitted(final_estimator, 'estimators_',
+        check_is_fitted(estimator, 'estimators_',
                         msg="'estimator' parameter must be a fitted estimator")
 
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
     ).ravel()
 
-    if method == 'recursion' and preprocessor is not None:
-        X_preprocessed = preprocessor.transform(X)
-    else:
-        X_preprocessed = X
-
     grid, values = _grid_from_X(
-        _get_column(X_preprocessed, features_indices), percentiles,
-        grid_resolution
+        _get_column(X, features_indices), percentiles, grid_resolution
     )
 
     if method == 'brute':
@@ -368,7 +357,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         )
     else:
         averaged_predictions = _partial_dependence_recursion(
-            final_estimator, grid, features_indices
+            estimator, grid, features_indices
         )
 
     # reshape averaged_predictions to
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 88946aa770436..5a3b82dc05f27 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -31,7 +31,6 @@
 from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import skip_if_no_pandas
 
 
 # toy sample
@@ -437,7 +436,7 @@ def test_partial_dependence_pipeline():
     assert_allclose(pdp_pipe, pdp_clf)
     assert_allclose(
         values_pipe[0],
-        (values_clf[0] * scaler.scale_[features]) + scaler.mean_[features]
+        values_clf[0] * scaler.scale_[features] + scaler.mean_[features]
     )
 
 
@@ -450,13 +449,14 @@ def test_partial_dependence_pipeline():
 @pytest.mark.parametrize(
     "preprocessor",
     [None,
-     make_column_transformer((StandardScaler(), iris.feature_names[:2]),
-                             (RobustScaler(), iris.feature_names[2:]))],
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         (RobustScaler(), [iris.feature_names[i] for i in (1, 3)]))],
     ids=['None', 'column-transformer']
 )
 @pytest.mark.parametrize(
     "features",
-    [[0, 1], iris.feature_names[:2]],
+    [[0, 2], [iris.feature_names[i] for i in (0, 2)]],
     ids=['features-integer', 'features-string']
 )
 def test_partial_dependence_dataframe(estimator, preprocessor, features):
@@ -468,15 +468,30 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
     pipe.fit(df, iris.target)
     pdp_pipe, values_pipe = partial_dependence(pipe, df, features=features)
 
-    X_preprocessed = (clone(preprocessor).fit_transform(df)
-                      if preprocessor is not None else df.values)
-    clf = clone(estimator).fit(X_preprocessed, iris.target)
+    # the column transformer will reorder the column when transforming
+    # we mixed the index to be sure that we are computing the partial
+    # dependence of the right columns
+    if preprocessor is not None:
+        X_proc = clone(preprocessor).fit_transform(df)
+        features_clf = [0, 1]
+    else:
+        X_proc = df
+        features_clf = [0, 2]
+
+    clf = clone(estimator).fit(X_proc, iris.target)
     pdp_clf, values_clf = partial_dependence(
-        clf, X_preprocessed, features=[0, 1]
+        clf, X_proc, features=features_clf, method='brute'
     )
 
     assert_allclose(pdp_pipe, pdp_clf)
-    # assert_allclose(values_pipe, values_clf)
+    if preprocessor is not None:
+        scaler = preprocessor.named_transformers_['standardscaler']
+        assert_allclose(
+            values_pipe[1],
+            values_clf[1] * scaler.scale_[1] + scaler.mean_[1]
+        )
+    else:
+        assert_allclose(values_pipe[1], values_clf[1])
 
 
 def test_plot_partial_dependence(pyplot):
diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index 6b3f5649ea08e..3ad4096c1d091 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -581,17 +581,6 @@ def set_random_state(estimator, random_state=0):
     skip_if_no_parallel = pytest.mark.skipif(not joblib.parallel.mp,
                                              reason="joblib is in serial mode")
 
-    def has_pandas():
-        try:
-            import pandas
-            return True
-        except ImportError:
-            return False
-
-    skip_if_no_pandas = pytest.mark.skipif(
-        not has_pandas, reason="pandas is not installed"
-    )
-
     #  Decorator for tests involving both BLAS calls and multiprocessing.
     #
     #  Under POSIX (e.g. Linux or OSX), using multiprocessing in conjunction

From 79156f31c0a5eb1c3dfa0da64e6087d0df365adf Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 6 Jun 2019 15:21:00 +0200
Subject: [PATCH 04/86] refactor common part with columntransformer

---
 sklearn/compose/_column_transformer.py   | 144 +-----------------
 sklearn/inspection/partial_dependence.py |  14 +-
 sklearn/utils/__init__.py                | 179 ++++++++++++++++++++++-
 3 files changed, 188 insertions(+), 149 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 5c234611e6aa5..b6521b6f0270b 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -17,6 +17,8 @@
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
+from ..utils import safe_indexing
+from ..utils import _get_column_indices
 from ..utils.metaestimators import _BaseComposition
 from ..utils.validation import check_array, check_is_fitted
 
@@ -402,7 +404,7 @@ def _fit_transform(self, X, y, func, fitted=False):
             return Parallel(n_jobs=self.n_jobs)(
                 delayed(func)(
                     transformer=clone(trans) if not fitted else trans,
-                    X=_get_column(X, column),
+                    X=safe_indexing(X, column, axis=1),
                     y=y,
                     weight=weight,
                     message_clsname='ColumnTransformer',
@@ -553,146 +555,6 @@ def _check_X(X):
     return check_array(X, force_all_finite='allow-nan', dtype=np.object)
 
 
-def _check_key_type(key, superclass):
-    """
-    Check that scalar, list or slice is of a certain type.
-
-    This is only used in _get_column and _get_column_indices to check
-    if the `key` (column specification) is fully integer or fully string-like.
-
-    Parameters
-    ----------
-    key : scalar, list, slice, array-like
-        The column specification to check
-    superclass : int or str
-        The type for which to check the `key`
-
-    """
-    if isinstance(key, superclass):
-        return True
-    if isinstance(key, slice):
-        return (isinstance(key.start, (superclass, type(None))) and
-                isinstance(key.stop, (superclass, type(None))))
-    if isinstance(key, list):
-        return all(isinstance(x, superclass) for x in key)
-    if hasattr(key, 'dtype'):
-        if superclass is int:
-            return key.dtype.kind == 'i'
-        else:
-            # superclass = str
-            return key.dtype.kind in ('O', 'U', 'S')
-    return False
-
-
-def _get_column(X, key):
-    """
-    Get feature column(s) from input data X.
-
-    Supported input types (X): numpy arrays, sparse arrays and DataFrames
-
-    Supported key types (key):
-    - scalar: output is 1D
-    - lists, slices, boolean masks: output is 2D
-    - callable that returns any of the above
-
-    Supported key data types:
-
-    - integer or boolean mask (positional):
-        - supported for arrays, sparse matrices and dataframes
-    - string (key-based):
-        - only supported for dataframes
-        - So no keys other than strings are allowed (while in principle you
-          can use any hashable object as key).
-
-    """
-    # check whether we have string column names or integers
-    if _check_key_type(key, int):
-        column_names = False
-    elif _check_key_type(key, str):
-        column_names = True
-    elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_):
-        # boolean mask
-        column_names = False
-        if hasattr(X, 'loc'):
-            # pandas boolean masks don't work with iloc, so take loc path
-            column_names = True
-    else:
-        raise ValueError("No valid specification of the columns. Only a "
-                         "scalar, list or slice of all integers or all "
-                         "strings, or boolean mask is allowed")
-
-    if column_names:
-        if hasattr(X, 'loc'):
-            # pandas dataframes
-            return X.loc[:, key]
-        else:
-            raise ValueError("Specifying the columns using strings is only "
-                             "supported for pandas DataFrames")
-    else:
-        if hasattr(X, 'iloc'):
-            # pandas dataframes
-            return X.iloc[:, key]
-        else:
-            # numpy arrays, sparse arrays
-            return X[:, key]
-
-
-def _get_column_indices(X, key):
-    """
-    Get feature column indices for input data X and key.
-
-    For accepted values of `key`, see the docstring of _get_column
-
-    """
-    n_columns = X.shape[1]
-
-    if (_check_key_type(key, int)
-            or hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_)):
-        # Convert key into positive indexes
-        try:
-            idx = np.arange(n_columns)[key]
-        except IndexError as e:
-            raise ValueError(
-                'all features must be in [0, %d]' % (n_columns - 1)
-            ) from e
-        return np.atleast_1d(idx).tolist()
-    elif _check_key_type(key, str):
-        try:
-            all_columns = list(X.columns)
-        except AttributeError:
-            raise ValueError("Specifying the columns using strings is only "
-                             "supported for pandas DataFrames")
-        if isinstance(key, str):
-            columns = [key]
-        elif isinstance(key, slice):
-            start, stop = key.start, key.stop
-            if start is not None:
-                start = all_columns.index(start)
-            if stop is not None:
-                # pandas indexing with strings is endpoint included
-                stop = all_columns.index(stop) + 1
-            else:
-                stop = n_columns + 1
-            return list(range(n_columns)[slice(start, stop)])
-        else:
-            columns = list(key)
-
-        try:
-            column_indices = [all_columns.index(col) for col in columns]
-        except ValueError as e:
-            if 'not in list' in str(e):
-                raise ValueError(
-                    "A given feature is not a column of the dataframe"
-                ) from e
-            raise
-
-        return column_indices
-    else:
-        raise ValueError("No valid specification of the columns. Only a "
-                         "scalar, list or slice of all integers or all "
-                         "strings, or boolean mask is allowed")
-
-
 def _is_empty_column_selection(column):
     """
     Return True if the column selection is empty (empty list or all-False
diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index ead089780cd3d..2a10ba78d0a2d 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -15,13 +15,12 @@
 from scipy.stats.mstats import mquantiles
 from joblib import Parallel, delayed
 
-from ..compose._column_transformer import _get_column
-from ..compose._column_transformer import _get_column_indices
-
 from ..base import is_classifier, is_regressor
 from ..utils.extmath import cartesian
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
+from ..utils import safe_indexing
+from ..utils import _get_column_indices
 from ..utils.validation import check_is_fitted
 from ..tree._tree import DTYPE
 from ..exceptions import NotFittedError
@@ -74,14 +73,14 @@ def _grid_from_X(X, percentiles, grid_resolution):
 
     values = []
     for feature in range(X.shape[1]):
-        uniques = np.unique(_get_column(X, feature))
+        uniques = np.unique(safe_indexing(X, feature, axis=1))
         if uniques.shape[0] < grid_resolution:
             # feature has low resolution use unique vals
             axis = uniques
         else:
             # create axis based on percentiles and grid resolution
             emp_percentiles = mquantiles(
-                _get_column(X, feature), prob=percentiles, axis=0
+                safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
             )
             if np.allclose(emp_percentiles[0], emp_percentiles[1]):
                 raise ValueError(
@@ -301,7 +300,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         raise ValueError('Multiclass-multioutput estimators are not supported')
 
     if not(hasattr(X, '__array__') or sparse.issparse(X)):
-        X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
+        return check_array(X, force_all_finite='allow-nan', dtype=np.object)
 
     accepted_responses = ('auto', 'predict_proba', 'decision_function')
     if response_method not in accepted_responses:
@@ -348,7 +347,8 @@ def partial_dependence(estimator, X, features, response_method='auto',
     ).ravel()
 
     grid, values = _grid_from_X(
-        _get_column(X, features_indices), percentiles, grid_resolution
+        safe_indexing(X, features_indices, axis=1), percentiles,
+         grid_resolution
     )
 
     if method == 'brute':
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index daf7e7763235d..3c610e94e7b34 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -178,7 +178,44 @@ def axis0_safe_slice(X, mask, len_mask):
     return np.zeros(shape=(0, X.shape[1]))
 
 
-def safe_indexing(X, indices):
+def safe_indexing(X, indices, axis=0):
+    """Return rows, items or columns of X using indices.
+
+    Parameters
+    ----------
+    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series.
+        Data from which to sample rows, items or columns.
+    indices :
+        When ``axis=0``, indices need to be an array of integer.
+        When ``axis=1``, indices can be one of:
+            Supported key types (key):
+            - scalar: output is 1D
+            - lists, slices, boolean masks: output is 2D
+            - callable that returns any of the above
+
+            Supported key data types:
+
+            - integer or boolean mask (positional):
+                - supported for arrays, sparse matrices and dataframes
+            - string (key-based):
+                - only supported for dataframes
+                - So no keys other than strings are allowed (while in principle you
+                can use any hashable object as key).
+    axis : int, default=0
+        The axis along which the X will be subsampled. ``axis=0`` will select
+        rows while ``axis=1`` will select columns.
+
+    Notes
+    -----
+    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
+    not supported.
+    """
+    if axis ==0:
+        return _safe_indexing_row(X, indices)
+    return _safe_indexing_column(X, indices)
+
+
+def _safe_indexing_row(X, indices):
     """Return items or rows from X using indices.
 
     Allows simple indexing of lists or arrays.
@@ -223,6 +260,146 @@ def safe_indexing(X, indices):
         return [X[idx] for idx in indices]
 
 
+def _check_key_type(key, superclass):
+    """
+    Check that scalar, list or slice is of a certain type.
+
+    This is only used in _get_column and _get_column_indices to check
+    if the `key` (column specification) is fully integer or fully string-like.
+
+    Parameters
+    ----------
+    key : scalar, list, slice, array-like
+        The column specification to check
+    superclass : int or str
+        The type for which to check the `key`
+
+    """
+    if isinstance(key, superclass):
+        return True
+    if isinstance(key, slice):
+        return (isinstance(key.start, (superclass, type(None))) and
+                isinstance(key.stop, (superclass, type(None))))
+    if isinstance(key, list):
+        return all(isinstance(x, superclass) for x in key)
+    if hasattr(key, 'dtype'):
+        if superclass is int:
+            return key.dtype.kind == 'i'
+        else:
+            # superclass = str
+            return key.dtype.kind in ('O', 'U', 'S')
+    return False
+
+
+def _safe_indexing_column(X, key):
+    """
+    Get feature column(s) from input data X.
+
+    Supported input types (X): numpy arrays, sparse arrays and DataFrames
+
+    Supported key types (key):
+    - scalar: output is 1D
+    - lists, slices, boolean masks: output is 2D
+    - callable that returns any of the above
+
+    Supported key data types:
+
+    - integer or boolean mask (positional):
+        - supported for arrays, sparse matrices and dataframes
+    - string (key-based):
+        - only supported for dataframes
+        - So no keys other than strings are allowed (while in principle you
+          can use any hashable object as key).
+
+    """
+    # check whether we have string column names or integers
+    if _check_key_type(key, int):
+        column_names = False
+    elif _check_key_type(key, str):
+        column_names = True
+    elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_):
+        # boolean mask
+        column_names = False
+        if hasattr(X, 'loc'):
+            # pandas boolean masks don't work with iloc, so take loc path
+            column_names = True
+    else:
+        raise ValueError("No valid specification of the columns. Only a "
+                         "scalar, list or slice of all integers or all "
+                         "strings, or boolean mask is allowed")
+
+    if column_names:
+        if hasattr(X, 'loc'):
+            # pandas dataframes
+            return X.loc[:, key]
+        else:
+            raise ValueError("Specifying the columns using strings is only "
+                             "supported for pandas DataFrames")
+    else:
+        if hasattr(X, 'iloc'):
+            # pandas dataframes
+            return X.iloc[:, key]
+        else:
+            # numpy arrays, sparse arrays
+            return X[:, key]
+
+
+def _get_column_indices(X, key):
+    """
+    Get feature column indices for input data X and key.
+
+    For accepted values of `key`, see the docstring of _get_column
+
+    """
+    n_columns = X.shape[1]
+
+    if (_check_key_type(key, int)
+            or hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_)):
+        # Convert key into positive indexes
+        try:
+            idx = np.arange(n_columns)[key]
+        except IndexError as e:
+            raise ValueError(
+                'all features must be in [0, %d]' % (n_columns - 1)
+            ) from e
+        return np.atleast_1d(idx).tolist()
+    elif _check_key_type(key, str):
+        try:
+            all_columns = list(X.columns)
+        except AttributeError:
+            raise ValueError("Specifying the columns using strings is only "
+                             "supported for pandas DataFrames")
+        if isinstance(key, str):
+            columns = [key]
+        elif isinstance(key, slice):
+            start, stop = key.start, key.stop
+            if start is not None:
+                start = all_columns.index(start)
+            if stop is not None:
+                # pandas indexing with strings is endpoint included
+                stop = all_columns.index(stop) + 1
+            else:
+                stop = n_columns + 1
+            return list(range(n_columns)[slice(start, stop)])
+        else:
+            columns = list(key)
+
+        try:
+            column_indices = [all_columns.index(col) for col in columns]
+        except ValueError as e:
+            if 'not in list' in str(e):
+                raise ValueError(
+                    "A given feature is not a column of the dataframe"
+                ) from e
+            raise
+
+        return column_indices
+    else:
+        raise ValueError("No valid specification of the columns. Only a "
+                         "scalar, list or slice of all integers or all "
+                         "strings, or boolean mask is allowed")
+
+
 def resample(*arrays, **options):
     """Resample arrays or sparse matrices in a consistent way
 

From 59cb6f5960eb2ad9146efc6e7d9463e3a3edcd69 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 6 Jun 2019 15:29:17 +0200
Subject: [PATCH 05/86] fix

---
 sklearn/inspection/partial_dependence.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 2a10ba78d0a2d..b4ebb3e3b0139 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -300,7 +300,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         raise ValueError('Multiclass-multioutput estimators are not supported')
 
     if not(hasattr(X, '__array__') or sparse.issparse(X)):
-        return check_array(X, force_all_finite='allow-nan', dtype=np.object)
+        X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
 
     accepted_responses = ('auto', 'predict_proba', 'decision_function')
     if response_method not in accepted_responses:
@@ -313,6 +313,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
             "The response_method parameter is ignored for regressors and "
             "must be 'auto'."
         )
+
     accepted_methods = ('brute', 'recursion', 'auto')
     if method not in accepted_methods:
         raise ValueError(

From cb4b00b1be286b6ade496465286df2b1f2b5dc98 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 6 Jun 2019 16:38:00 +0200
Subject: [PATCH 06/86] TST check the support of different types for features

---
 .../tests/test_partial_dependence.py          | 23 +++++++++++++++++++
 sklearn/utils/__init__.py                     |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 5a3b82dc05f27..cad8908591457 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -494,6 +494,29 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
         assert_allclose(values_pipe[1], values_clf[1])
 
 
+@pytest.mark.parametrize(
+    "features",
+    [0, iris.feature_names[0],
+     [0, 2], [iris.feature_names[i] for i in (0, 2)],
+     slice(0, 2, 1), [True, False, True, False]],
+    ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'slice', 'mask']
+)
+def test_partial_dependence_feature_type(features):
+    # check all possible features type supported in PDP
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+        (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])
+    )
+    pipe = make_pipeline(
+        preprocessor, LogisticRegression(max_iter=1000, random_state=0)
+    )
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(pipe, df, features=features)
+
+
 def test_plot_partial_dependence(pyplot):
     # Test partial dependence plot function.
     boston = load_boston()
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 3c610e94e7b34..98ee5af8d9349 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -210,7 +210,7 @@ def safe_indexing(X, indices, axis=0):
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
     """
-    if axis ==0:
+    if axis == 0:
         return _safe_indexing_row(X, indices)
     return _safe_indexing_column(X, indices)
 

From c04dcba72bc0d0fcadb70200f693a8de0e0b3056 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 18 Jul 2019 15:09:31 +0200
Subject: [PATCH 07/86] problem merge

---
 sklearn/inspection/tests/test_partial_dependence.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index c4d9f64796cbc..5b0b6a14fcf62 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -30,11 +30,8 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.preprocessing import StandardScaler
-<<<<<<< HEAD
 from sklearn.preprocessing import RobustScaler
 from sklearn.pipeline import make_pipeline
-=======
->>>>>>> origin/master
 from sklearn.dummy import DummyClassifier
 from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.utils.testing import assert_allclose

From 0326a88aaf67dc7723d7eea97eebdeea7b36f65f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 19 Jul 2019 16:21:49 +0200
Subject: [PATCH 08/86] PEP8

---
 sklearn/inspection/partial_dependence.py            | 2 +-
 sklearn/inspection/tests/test_partial_dependence.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 311fd57d44454..c778c15887015 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -362,7 +362,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
 
     grid, values = _grid_from_X(
         safe_indexing(X, features_indices, axis=1), percentiles,
-         grid_resolution
+        grid_resolution
     )
 
     if method == 'brute':
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 5b0b6a14fcf62..446e28cd4c939 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -27,7 +27,6 @@
 from sklearn.cluster import KMeans
 from sklearn.compose import make_column_transformer
 from sklearn.metrics import r2_score
-from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.preprocessing import StandardScaler
 from sklearn.preprocessing import RobustScaler

From 2f0f69049f460195a71e3a102b301de3014c05a6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 19 Jul 2019 16:51:30 +0200
Subject: [PATCH 09/86] issue merge

---
 sklearn/utils/__init__.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 8cb9a16918ff5..efcaf6865faa5 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -407,6 +407,12 @@ def _get_column_indices(X, key):
                 ) from e
             raise
 
+        return column_indices
+    else:
+        raise ValueError("No valid specification of the columns. Only a "
+                         "scalar, list or slice of all integers or all "
+                         "strings, or boolean mask is allowed")
+
 
 def resample(*arrays, **options):
     """Resample arrays or sparse matrices in a consistent way

From 33e655d842c9b11e6dfbb4ce7a7aee3e5c9e5920 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 19 Jul 2019 18:03:44 +0200
Subject: [PATCH 10/86] iter

---
 sklearn/inspection/partial_dependence.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index c778c15887015..6a9649e1bad16 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -353,8 +353,12 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        check_is_fitted(estimator, 'estimators_',
-                        msg="'estimator' parameter must be a fitted estimator")
+        msg="'estimator' parameter must be a fitted estimator"
+        if isinstance(estimator, BaseGradientBoosting):
+            fitted_attribute = 'estimators_'
+        else:
+            fitted_attribute = 'n_iter_'
+        check_is_fitted(estimator, fitted_attribute, msg=msg)
 
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'

From 01947170924b7e6d4b9b26c24f8899cb35487c22 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 22 Jul 2019 14:32:59 +0200
Subject: [PATCH 11/86] fix

---
 sklearn/inspection/partial_dependence.py            | 2 +-
 sklearn/inspection/tests/test_partial_dependence.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 6a9649e1bad16..38173a5e4c8f6 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -353,7 +353,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        msg="'estimator' parameter must be a fitted estimator"
+        msg ="'estimator' parameter must be a fitted estimator"
         if isinstance(estimator, BaseGradientBoosting):
             fitted_attribute = 'estimators_'
         else:
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 446e28cd4c939..d494a05f599b7 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -366,7 +366,7 @@ def test_partial_dependence_unknown_feature_string(estimator):
     estimator.fit(df, y)
 
     features = 'random'
-    err_msg = 'A given feature is not a column of the dataframe'
+    err_msg = 'A given column is not a column of the dataframe'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, df, [features])
 

From 72ee546fceb1fb3582ea16460d32dc2a8ee515c5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 22 Jul 2019 14:34:54 +0200
Subject: [PATCH 12/86] PEP8

---
 sklearn/inspection/partial_dependence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 38173a5e4c8f6..4fb6231580053 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -353,7 +353,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        msg ="'estimator' parameter must be a fitted estimator"
+        msg = "'estimator' parameter must be a fitted estimator"
         if isinstance(estimator, BaseGradientBoosting):
             fitted_attribute = 'estimators_'
         else:

From db25ee6246412086d5045d2e896dca6dece95bfe Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 23 Jul 2019 18:08:22 +0200
Subject: [PATCH 13/86] update docstring

---
 sklearn/inspection/partial_dependence.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 4fb6231580053..b5228a75eab59 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -186,11 +186,10 @@ def partial_dependence(estimator, X, features, response_method='auto',
         A fitted estimator object implementing `predict`, `predict_proba`,
         or `decision_function`. Multioutput-multiclass classifiers are not
         supported.
-    X : array-like, shape (n_samples, n_features)
+    X : array-like or DataFrame, shape (n_samples, n_features)
         ``X`` is used both to generate a grid of values for the
         ``features``, and to compute the averaged predictions when
         method is 'brute'.
-    # TODO: update the type accepted
     features : list or array-like of int
         The target features for which the partial dependency should be
         computed.
@@ -404,7 +403,7 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
         A fitted estimator object implementing `predict`, `predict_proba`,
         or `decision_function`. Multioutput-multiclass classifiers are not
         supported.
-    X : array-like, shape (n_samples, n_features)
+    X : array-like or DataFrame, shape (n_samples, n_features)
         The data to use to build the grid of values on which the dependence
         will be evaluated. This is usually the training data.
     features : list of {int, str, pair of int, pair of str}

From 60b8f59f12298f25d925d54b222111ae14474025 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 23 Jul 2019 18:51:34 +0200
Subject: [PATCH 14/86] whats new

---
 doc/whats_new/v0.22.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index f2046cc6b64f1..9e288c5079893 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -168,6 +168,13 @@ Changelog
   match `spectral_clustering`.
   :pr:`13726` by :user:`Shuzhe Xiao <fdas3213>`.
 
+:mod:`sklearn.inspection`
+.........................
+
+- |Enhancement| :func:`inspection.partial_dependence` accepts pandas DataFrame
+  and :class:`pipeline.Pipeline` containing :class:`compose.ColumnTransformer`.
+  :pr:`14028` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.feature_selection`
 ................................
 - |Fix| Fixed a bug where :class:`VarianceThreshold` with `threshold=0` did not

From c01385c052a010f138707ad41e593414cfe76133 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 25 Jul 2019 18:20:00 +0200
Subject: [PATCH 15/86] EHN add support for scalar, slice and mask in
 safe_indexing axis=0

---
 doc/whats_new/v0.22.rst           |  4 ++
 sklearn/utils/__init__.py         | 21 +++++++++--
 sklearn/utils/tests/test_utils.py | 62 ++++++++++++++++++++++++++-----
 3 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index bcef08ff1881b..d1d838ec99dc5 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -206,6 +206,10 @@ Changelog
   NumPy array, SciPy sparse matrix, and Pandas DataFrame.
   :pr:`14035` by `Guillaume Lemaitre <glemaitre>`.
 
+- |Enhancement| :func:`utils.safe_indexing` becomes more consistent and support
+  scalar, slice, mask indexing for both `axis=0` and `axis=1`.
+  :pr:`xx` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.neural_network`
 .............................
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index efcaf6865faa5..2b2b33b0186d4 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -1,6 +1,7 @@
 """
 The :mod:`sklearn.utils` module includes various utilities.
 """
+from collections.abc import Iterable
 from collections.abc import Sequence
 from contextlib import contextmanager
 from itertools import islice
@@ -188,6 +189,10 @@ def safe_indexing(X, indices, axis=0):
         Data from which to sample rows, items or columns.
     indices : array-like
         - When ``axis=0``, indices need to be an array of integer.
+            - container: lists, slices, boolean masks: output is 2D.
+              Supported data types for containers:
+                - integer or boolean (positional): supported for arrays, sparse
+                  and dataframes
         - When ``axis=1``, indices can be one of:
             - scalar: output is 1D, unless `X` is sparse.
               Supported data types for scalars:
@@ -247,10 +252,12 @@ def _safe_indexing_row(X, indices):
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
     """
-    if hasattr(X, "iloc"):
-        # Work-around for indexing with read-only indices in pandas
+    if not isinstance(indices, slice):
         indices = np.asarray(indices)
-        indices = indices if indices.flags.writeable else indices.copy()
+    if hasattr(X, "iloc"):
+        if not isinstance(indices, slice):
+            # Work-around for indexing with read-only indices in pandas
+            indices = indices if indices.flags.writeable else indices.copy()
         # Pandas Dataframes and Series
         try:
             return X.iloc[indices]
@@ -268,7 +275,13 @@ def _safe_indexing_row(X, indices):
         else:
             return X[indices]
     else:
-        return [X[idx] for idx in indices]
+        # In the case of a slice or a scalar
+        if not isinstance(indices, Iterable) or indices.ndim == 0:
+            return X[indices]
+        else:
+            if np.issubdtype(indices.dtype, np.bool_):
+                indices = np.flatnonzero(indices)
+            return [X[idx] for idx in indices]
 
 
 def _check_key_type(key, superclass):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index a39e8160047a5..1601002449924 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -10,6 +10,7 @@
 
 from sklearn.utils.testing import (assert_raises,
                                    assert_array_equal,
+                                   assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
 from sklearn.utils import check_random_state
@@ -219,14 +220,51 @@ def test_check_key_type(key, clazz, is_expected_type):
     assert _check_key_type(key, clazz) is is_expected_type
 
 
-@pytest.mark.parametrize("asarray", [True, False], ids=["array-like", "array"])
-def test_safe_indexing_axis_0(asarray):
+@pytest.mark.parametrize(
+    "idx",
+    [[0, 2], [True, False, True],  # array-like
+     np.array([0, 2]), np.array([True, False, True])],  # numpy array
+    ids=['list-indices', 'list-mask', 'array-indices', 'array-mask']
+)
+@pytest.mark.parametrize(
+    "array_type", [None, np.asarray, sp.csr_matrix],
+    ids=["list", "array", "sparse"]
+)
+def test_safe_indexing_axis_0_container(idx, array_type):
     X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    inds = np.array([1, 2]) if asarray else [1, 2]
-    X_inds = safe_indexing(X, inds)
-    X_arrays = safe_indexing(np.array(X), inds)
-    assert_array_equal(np.array(X_inds), X_arrays)
-    assert_array_equal(np.array(X_inds), np.array(X)[inds])
+    X = array_type(X) if array_type is not None else X
+    X_subset = safe_indexing(X, idx, axis=0)
+    X_expect = [[1, 2, 3], [7, 8, 9]]
+    X_expect = array_type(X_expect) if array_type is not None else X_expect
+    assert_allclose_dense_sparse(X_subset, X_expect)
+
+
+@pytest.mark.parametrize(
+    "array_type", [None, np.asarray, sp.csr_matrix],
+    ids=["list", "array", "sparse"]
+)
+def test_safe_indexing_axis_0_slice(array_type):
+    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    X = array_type(X) if array_type is not None else X
+    idx = slice(0, 2)
+    X_subset = safe_indexing(X, idx, axis=0)
+    X_expect = [[1, 2, 3], [4, 5, 6]]
+    X_expect = array_type(X_expect) if array_type is not None else X_expect
+    assert_allclose_dense_sparse(X_subset, X_expect)
+
+
+@pytest.mark.parametrize(
+    "array_type", [None, np.asarray, sp.csr_matrix],
+    ids=["list", "array", "sparse"]
+)
+def test_safe_indexing_axis_0_scalar(array_type):
+    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    X = array_type(X) if array_type is not None else X
+    idx = 1  # scalar indexing
+    X_subset = safe_indexing(X, idx, axis=0)
+    X_expect = [4, 5, 6]
+    X_expect = array_type(X_expect) if array_type is not None else X_expect
+    assert_allclose_dense_sparse(X_subset, X_expect)
 
 
 @pytest.mark.parametrize("idx", [0, [0, 1]], ids=['scalar', 'list'])
@@ -355,11 +393,15 @@ def test_safe_indexing_pandas_series(idx, asarray):
     assert_array_equal(safe_indexing(serie, idx).values, [0, 1])
 
 
-@pytest.mark.parametrize("asarray", [True, False], ids=["array-like", "array"])
-def test_safe_indexing_mock_pandas(asarray):
+@pytest.mark.parametrize(
+    "inds",
+    [[1, 2], [False, True, True],
+     np.array([1, 2]), np.array([False, True, True]),
+     slice(1, None)]
+)
+def test_safe_indexing_mock_pandas(inds):
     X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     X_df = MockDataFrame(X)
-    inds = np.array([1, 2]) if asarray else [1, 2]
     X_df_indexed = safe_indexing(X_df, inds)
     X_indexed = safe_indexing(X_df, inds)
     assert_array_equal(np.array(X_df_indexed), X_indexed)

From 0e5c03738557804fcccc9a4c1746a38714529139 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 25 Jul 2019 18:29:37 +0200
Subject: [PATCH 16/86] DOC

---
 doc/whats_new/v0.22.rst   |  2 +-
 sklearn/utils/__init__.py | 17 ++++++-----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index d1d838ec99dc5..716a500ebe085 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -208,7 +208,7 @@ Changelog
 
 - |Enhancement| :func:`utils.safe_indexing` becomes more consistent and support
   scalar, slice, mask indexing for both `axis=0` and `axis=1`.
-  :pr:`xx` by :user:`Guillaume Lemaitre <glemaitre>`.
+  :pr:`14475` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 :mod:`sklearn.neural_network`
 .............................
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 2b2b33b0186d4..b1bb933efd140 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -188,23 +188,18 @@ def safe_indexing(X, indices, axis=0):
     X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
         Data from which to sample rows, items or columns.
     indices : array-like
-        - When ``axis=0``, indices need to be an array of integer.
-            - container: lists, slices, boolean masks: output is 2D.
-              Supported data types for containers:
-                - integer or boolean (positional): supported for arrays, sparse
-                  and dataframes
-        - When ``axis=1``, indices can be one of:
+        - For both `axis=0` and `axis=1`, indices can be one of:
             - scalar: output is 1D, unless `X` is sparse.
               Supported data types for scalars:
                 - integer: supported for arrays, sparse matrices and
                   dataframes.
-                - string (key-based): only supported for dataframes.
             - container: lists, slices, boolean masks: output is 2D.
               Supported data types for containers:
-                - integer or boolean (positional): supported for
-                  arrays, sparse matrices and dataframes
-                - string (key-based): only supported for dataframes. No keys
-                  other than strings are allowed.
+                - integer or boolean (positional): supported for arrays, sparse
+                  and dataframes
+        - If `X` is a dataframe and `axis=1`, indices support string data type
+          (key-based) as a scalar or a container. The output dimension will be
+          identical to the above case.
     axis : int, default=0
         The axis along which `X` will be subsampled. ``axis=0`` will select
         rows while ``axis=1`` will select columns.

From f5e08c4272fe1633f490ec15ff5381c5f5b3caef Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 25 Jul 2019 22:20:08 +0200
Subject: [PATCH 17/86] FIX behaviour when passing None

---
 sklearn/utils/__init__.py         |  4 +++-
 sklearn/utils/tests/test_utils.py | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index b1bb933efd140..e0f5be6dc35c0 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -247,7 +247,9 @@ def _safe_indexing_row(X, indices):
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
     """
-    if not isinstance(indices, slice):
+    if indices is None:
+        return X
+    elif not isinstance(indices, slice):
         indices = np.asarray(indices)
     if hasattr(X, "iloc"):
         if not isinstance(indices, slice):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 1601002449924..737a20221b346 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -267,6 +267,17 @@ def test_safe_indexing_axis_0_scalar(array_type):
     assert_allclose_dense_sparse(X_subset, X_expect)
 
 
+@pytest.mark.parametrize(
+    "array_type", [None, np.asarray, sp.csr_matrix],
+    ids=["list", "array", "sparse"]
+)
+def test_safe_indexing_axis_0_None(array_type):
+    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    X = array_type(X) if array_type is not None else X
+    X_subset = safe_indexing(X, None, axis=0)
+    assert_allclose_dense_sparse(X_subset, X)
+
+
 @pytest.mark.parametrize("idx", [0, [0, 1]], ids=['scalar', 'list'])
 @pytest.mark.parametrize("asarray", [True, False], ids=["array-like", "array"])
 def test_safe_indexing_axis_1_sparse(idx, asarray):
@@ -395,9 +406,11 @@ def test_safe_indexing_pandas_series(idx, asarray):
 
 @pytest.mark.parametrize(
     "inds",
-    [[1, 2], [False, True, True],
+    [None,
+     [1, 2], [False, True, True],
      np.array([1, 2]), np.array([False, True, True]),
-     slice(1, None)]
+     slice(1, None)],
+     ids=['None', 'list-int', 'list-mask', 'array-int', 'array-mask', 'slice']
 )
 def test_safe_indexing_mock_pandas(inds):
     X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

From bb4db91755cf724a0e856ef9dab8bcccdecca686 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 25 Jul 2019 22:21:53 +0200
Subject: [PATCH 18/86] PEP8

---
 sklearn/utils/tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 737a20221b346..468f7dae4e2e8 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -410,7 +410,7 @@ def test_safe_indexing_pandas_series(idx, asarray):
      [1, 2], [False, True, True],
      np.array([1, 2]), np.array([False, True, True]),
      slice(1, None)],
-     ids=['None', 'list-int', 'list-mask', 'array-int', 'array-mask', 'slice']
+    ids=['None', 'list-int', 'list-mask', 'array-int', 'array-mask', 'slice']
 )
 def test_safe_indexing_mock_pandas(inds):
     X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

From 8cd74db2fba750c17c6b4dfe066e411dd30c43b6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 12:05:25 +0200
Subject: [PATCH 19/86] address thomas comments

---
 sklearn/utils/__init__.py         | 2 +-
 sklearn/utils/tests/test_utils.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index b1bb933efd140..e06cf0dd2838b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -196,7 +196,7 @@ def safe_indexing(X, indices, axis=0):
             - container: lists, slices, boolean masks: output is 2D.
               Supported data types for containers:
                 - integer or boolean (positional): supported for arrays, sparse
-                  and dataframes
+                  matrices and dataframes
         - If `X` is a dataframe and `axis=1`, indices support string data type
           (key-based) as a scalar or a container. The output dimension will be
           identical to the above case.
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 1601002449924..897a95542e33d 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -240,12 +240,12 @@ def test_safe_indexing_axis_0_container(idx, array_type):
 
 
 @pytest.mark.parametrize(
-    "array_type", [None, np.asarray, sp.csr_matrix],
+    "array_type", [list, np.asarray, sp.csr_matrix],
     ids=["list", "array", "sparse"]
 )
 def test_safe_indexing_axis_0_slice(array_type):
     X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    X = array_type(X) if array_type is not None else X
+    X = array_type(X)
     idx = slice(0, 2)
     X_subset = safe_indexing(X, idx, axis=0)
     X_expect = [[1, 2, 3], [4, 5, 6]]
@@ -254,12 +254,12 @@ def test_safe_indexing_axis_0_slice(array_type):
 
 
 @pytest.mark.parametrize(
-    "array_type", [None, np.asarray, sp.csr_matrix],
+    "array_type", [list, np.asarray, sp.csr_matrix],
     ids=["list", "array", "sparse"]
 )
 def test_safe_indexing_axis_0_scalar(array_type):
     X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    X = array_type(X) if array_type is not None else X
+    X = array_type(X)
     idx = 1  # scalar indexing
     X_subset = safe_indexing(X, idx, axis=0)
     X_expect = [4, 5, 6]

From 075dd80f408c3b38acd6464c42a5874c1bb33089 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 14:53:40 +0200
Subject: [PATCH 20/86] debug

---
 build_tools/azure/test_script.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
index 6e27915e0be6b..27a3e4b649a13 100755
--- a/build_tools/azure/test_script.sh
+++ b/build_tools/azure/test_script.sh
@@ -21,7 +21,7 @@ except ImportError:
 python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())"
 pip list
 
-TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
+TEST_CMD="python -m pytest --verbose --showlocals --durations=20 --junitxml=$JUNITXML"
 
 if [[ "$COVERAGE" == "true" ]]; then
     export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc"

From d0f8d60a8654747f2f5600e3fae9c071c1c417aa Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 18:31:25 +0200
Subject: [PATCH 21/86] FIX change boolean array-likes indexing in old NumPy
 version

---
 sklearn/compose/tests/test_column_transformer.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index ae7ef31d6c7f1..2ccfd6d6c2eae 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -16,6 +16,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.compose import ColumnTransformer, make_column_transformer
 from sklearn.exceptions import NotFittedError
+from sklearn.preprocessing import FunctionTransformer
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_extraction import DictVectorizer
 
@@ -1108,3 +1109,14 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
     err_msg = 'Specifying the columns'
     with pytest.raises(ValueError, match=err_msg):
         tf.transform(X_array)
+
+
+def test_column_transformer_mask_indexing():
+    # Regression test for #xxxxx
+    # Boolean mask indexing with NumPy < 1.13
+    X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
+    column_transformer = ColumnTransformer(
+        [('identity', FunctionTransformer(), [False, True, False, True])]
+    )
+    X_trans = column_transformer.fit_transform(X)
+    assert X_trans.shape == (3, 2)

From f95a228e5444d801b6ab09d30dcc655d347cc663 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 19:12:10 +0200
Subject: [PATCH 22/86] change indexing

---
 sklearn/utils/__init__.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index efcaf6865faa5..8db41bb27986d 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -18,6 +18,7 @@
 from . import _joblib
 from ..exceptions import DataConversionWarning
 from .deprecation import deprecated
+from .fixes import np_version
 from .validation import (as_float_array,
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
@@ -225,6 +226,17 @@ def safe_indexing(X, indices, axis=0):
         )
 
 
+# FIXME: to be removed once NumPy 1.13 is the minimum version required
+def _array_indexing(array, key, axis=0):
+    """Index an array consistently across NumPy version."""
+    if np_version < (1, 13):
+        # check if we have an boolean array-likes to make the proper indexing
+        key_array = np.asarray(key)
+        if np.issubdtype(key_array.dtype, np.bool_):
+            key = key_array
+    return array[key] if axis == 0 else array[:, key]
+
+
 def _safe_indexing_row(X, indices):
     """Return items or rows from X using indices.
 
@@ -266,7 +278,7 @@ def _safe_indexing_row(X, indices):
             # This is often substantially faster than X[indices]
             return X.take(indices, axis=0)
         else:
-            return X[indices]
+            return _array_indexing(X, indices, axis=0)
     else:
         return [X[idx] for idx in indices]
 
@@ -356,7 +368,7 @@ def _safe_indexing_column(X, key):
             return X.iloc[:, key]
         else:
             # numpy arrays, sparse arrays
-            return X[:, key]
+            return _array_indexing(X, key, axis=1)
 
 
 def _get_column_indices(X, key):
@@ -371,7 +383,7 @@ def _get_column_indices(X, key):
             or hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_)):
         # Convert key into positive indexes
         try:
-            idx = np.arange(n_columns)[key]
+            idx = safe_indexing(np.arange(n_columns), key)
         except IndexError as e:
             raise ValueError(
                 'all features must be in [0, %d]' % (n_columns - 1)

From 1c8180390799d22bf42b5c1673caf0cb3dd71c79 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 09:54:22 +0200
Subject: [PATCH 23/86] add regression test in utils

---
 doc/whats_new/v0.22.rst                       |  8 +++++++
 .../compose/tests/test_column_transformer.py  |  4 ++--
 sklearn/utils/tests/test_utils.py             | 22 +++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 93635d88069d5..114afb9185a18 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -61,6 +61,14 @@ Changelog
   `sample_weights` are not supported by the wrapped estimator). :pr:`13575`
   by :user:`William de Vazelhes <wdevazelhes>`.
 
+:mod:`sklearn.compose`
+......................
+
+- |Fix| Fixed a bug in :class:`compose.ColumnTransformer` which failed to
+  select the proper columns when using a boolean list and NumPy older than
+  1.13.
+  :pr:`14510` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.datasets`
 .......................
 
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 2ccfd6d6c2eae..a9c4fd9e25fbe 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1112,8 +1112,8 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
 
 
 def test_column_transformer_mask_indexing():
-    # Regression test for #xxxxx
-    # Boolean mask indexing with NumPy < 1.13
+    # Regression test for #14510
+    # Boolean array-like does not behave as boolean array with NumPy < 1.13
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
     column_transformer = ColumnTransformer(
         [('identity', FunctionTransformer(), [False, True, False, True])]
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index a39e8160047a5..35cfde4aaef7d 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -10,6 +10,7 @@
 
 from sklearn.utils.testing import (assert_raises,
                                    assert_array_equal,
+                                   assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
 from sklearn.utils import check_random_state
@@ -365,6 +366,27 @@ def test_safe_indexing_mock_pandas(asarray):
     assert_array_equal(np.array(X_df_indexed), X_indexed)
 
 
+@pytest.mark.parametrize("array_type", ['array', 'sparse', 'dataframe'])
+def test_safe_indexing_mask_axis_1(array_type):
+    # regression test for #14510
+    # check that boolean array-like and boolean array lead to the same indexing
+    # even in NumPy < 1.13
+    if array_type == 'array':
+        array_constructor = np.asarray
+    elif array_type == 'sparse':
+        array_constructor = sp.csr_matrix
+    elif array_type == 'dataframe':
+        pd = pytest.importorskip('pandas')
+        array_constructor = pd.DataFrame
+
+    X = array_constructor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    mask = [True, False, True]
+    mask_array = np.array(mask)
+    X_masked = safe_indexing(X, mask, axis=1)
+    X_masked_array = safe_indexing(X, mask_array, axis=1)
+    assert_allclose_dense_sparse(X_masked, X_masked_array)
+
+
 def test_shuffle_on_ndim_equals_three():
     def to_tuple(A):    # to make the inner arrays hashable
         return tuple(tuple(tuple(C) for C in B) for B in A)

From c8009a28aa2855e8e01cfebd3ef5df337c0536f4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 12:17:43 +0200
Subject: [PATCH 24/86] fix

---
 sklearn/utils/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 8db41bb27986d..ac6446afcd6cf 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -229,7 +229,7 @@ def safe_indexing(X, indices, axis=0):
 # FIXME: to be removed once NumPy 1.13 is the minimum version required
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
-    if np_version < (1, 13):
+    if np_version < (1, 13) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
         if np.issubdtype(key_array.dtype, np.bool_):

From a80b33d7a60d67706150d48f71b3d0837fceba38 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 14:25:31 +0200
Subject: [PATCH 25/86] add test in column transformer

---
 sklearn/compose/tests/test_column_transformer.py | 5 ++++-
 sklearn/utils/__init__.py                        | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index a9c4fd9e25fbe..a667b35cf65e3 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1111,10 +1111,13 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
         tf.transform(X_array)
 
 
-def test_column_transformer_mask_indexing():
+@pytest.mark.parametrize("array_type", [np.asarray, sparse.csr_matrix])
+def test_column_transformer_mask_indexing(array_type):
     # Regression test for #14510
     # Boolean array-like does not behave as boolean array with NumPy < 1.13
+    # and sparse matrices as well
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
+    X = array_type(X)
     column_transformer = ColumnTransformer(
         [('identity', FunctionTransformer(), [False, True, False, True])]
     )
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index ac6446afcd6cf..83f4d7fd1876c 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -226,7 +226,6 @@ def safe_indexing(X, indices, axis=0):
         )
 
 
-# FIXME: to be removed once NumPy 1.13 is the minimum version required
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
     if np_version < (1, 13) or issparse(array):

From 9fb045dcf1b7923bf06021b6944ca0cb3dd8ad40 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 14:22:46 +0200
Subject: [PATCH 26/86] raise error if axis not 0 or 1

---
 sklearn/utils/__init__.py         | 5 +++++
 sklearn/utils/tests/test_utils.py | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 83f4d7fd1876c..3b4a20d08716b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -228,6 +228,11 @@ def safe_indexing(X, indices, axis=0):
 
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
+    if axis not in (0, 1):
+        raise ValueError(
+            "'axis' should be either 0 (to index rows) or 1 (to index "
+            " column). Got {} instead.".format(axis)
+        )
     if np_version < (1, 13) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 35cfde4aaef7d..49f50eedc0a42 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -13,6 +13,7 @@
                                    assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
+from sklearn.utils import _array_indexing
 from sklearn.utils import check_random_state
 from sklearn.utils import _check_key_type
 from sklearn.utils import deprecated
@@ -387,6 +388,13 @@ def test_safe_indexing_mask_axis_1(array_type):
     assert_allclose_dense_sparse(X_masked, X_masked_array)
 
 
+def test_array_indexing_array_error():
+    X = np.array([[0, 1], [2, 3]])
+    mask = [True, False]
+    with pytest.raises(ValueError, match="'axis' should be either 0"):
+        _array_indexing(X, mask, axis=3)
+
+
 def test_shuffle_on_ndim_equals_three():
     def to_tuple(A):    # to make the inner arrays hashable
         return tuple(tuple(tuple(C) for C in B) for B in A)

From 5dcf34f3979c365f99b94b4f4a116c2ec4ed18b2 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 14:40:02 +0200
Subject: [PATCH 27/86] itert

---
 sklearn/utils/__init__.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index c41325c03d307..901ad3561dd5c 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -268,10 +268,10 @@ def _safe_indexing_row(X, indices):
     elif not isinstance(indices, slice):
         indices = np.asarray(indices)
     if hasattr(X, "iloc"):
+        # Pandas Dataframes and Series
         if not isinstance(indices, slice):
             # Work-around for indexing with read-only indices in pandas
             indices = indices if indices.flags.writeable else indices.copy()
-        # Pandas Dataframes and Series
         try:
             return X.iloc[indices]
         except ValueError:
@@ -281,20 +281,14 @@ def _safe_indexing_row(X, indices):
                           DataConversionWarning)
             return X.copy().iloc[indices]
     elif hasattr(X, "shape"):
-        if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
-                                   indices.dtype.kind == 'i'):
-            # This is often substantially faster than X[indices]
-            return X.take(indices, axis=0)
-        else:
-            return _array_indexing(X, indices, axis=0)
-    else:
+        return _array_indexing(X, indices, axis=0)
+    elif not isinstance(indices, Iterable) or indices.ndim == 0:
         # In the case of a slice or a scalar
-        if not isinstance(indices, Iterable) or indices.ndim == 0:
-            return X[indices]
-        else:
-            if np.issubdtype(indices.dtype, np.bool_):
-                indices = np.flatnonzero(indices)
-            return [X[idx] for idx in indices]
+        return X[indices]
+    else:
+        if np.issubdtype(indices.dtype, np.bool_):
+            indices = np.flatnonzero(indices)
+        return [X[idx] for idx in indices]
 
 
 def _check_key_type(key, superclass):

From 70f0e023b76558bad6226aa92a25c1024870e839 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 16:52:10 +0200
Subject: [PATCH 28/86] iter

---
 sklearn/utils/__init__.py | 68 +++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 901ad3561dd5c..cad6ecaa64f80 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -4,6 +4,7 @@
 from collections.abc import Iterable
 from collections.abc import Sequence
 from contextlib import contextmanager
+from itertools import compress
 from itertools import islice
 import numbers
 import platform
@@ -188,19 +189,17 @@ def safe_indexing(X, indices, axis=0):
     ----------
     X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
         Data from which to sample rows, items or columns.
-    indices : array-like
-        - For both `axis=0` and `axis=1`, indices can be one of:
-            - scalar: output is 1D, unless `X` is sparse.
-              Supported data types for scalars:
-                - integer: supported for arrays, sparse matrices and
-                  dataframes.
-            - container: lists, slices, boolean masks: output is 2D.
-              Supported data types for containers:
-                - integer or boolean (positional): supported for arrays, sparse
-                  matrices and dataframes
-        - If `X` is a dataframe and `axis=1`, indices support string data type
-          (key-based) as a scalar or a container. The output dimension will be
-          identical to the above case.
+    indices : bool, int, str, array-like
+        - To select a single element (i.e. row or column), `indices` can be one
+          of the following: `bool` or `int` which are supported by all types of
+          `X`. `indices` being a `str` is only supported for `X` being a
+          dataframe. The selected subset will be 1D, unless `X` is a sparse
+          matrix in which case it will be 2D.
+        - To select multiple elements (i.e. rows or columns), `indices` can be
+          one of the following: `list`, `array`, `slice`. The type used in
+          these containers can be one of the following: `int`, `bool`, and
+          `str`. `str` is only supported when `X` is a dataframe.
+          The selected subset will be 2D.
     axis : int, default=0
         The axis along which `X` will be subsampled. ``axis=0`` will select
         rows while ``axis=1`` will select columns.
@@ -241,17 +240,18 @@ def _array_indexing(array, key, axis=0):
     return array[key] if axis == 0 else array[:, key]
 
 
-def _safe_indexing_row(X, indices):
+def _safe_indexing_row(X, key):
     """Return items or rows from X using indices.
 
-    Allows simple indexing of lists or arrays.
+    Allows simple indexing of lists, NumPy array, SciPy sparse matrices, and
+    Pandas DataFrame`.
 
     Parameters
     ----------
     X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
         Data from which to sample rows or items.
-    indices : array-like of int
-        Indices according to which X will be subsampled.
+    key : int, slice, bool or int array-like
+        Key used to get a subset of X.
 
     Returns
     -------
@@ -263,32 +263,36 @@ def _safe_indexing_row(X, indices):
     CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
     not supported.
     """
-    if indices is None:
+    if key is None:
         return X
-    elif not isinstance(indices, slice):
-        indices = np.asarray(indices)
     if hasattr(X, "iloc"):
         # Pandas Dataframes and Series
-        if not isinstance(indices, slice):
-            # Work-around for indexing with read-only indices in pandas
-            indices = indices if indices.flags.writeable else indices.copy()
+        if hasattr(key, 'flags'):
+            # Work-around for indexing with read-only key in pandas
+            key = key if key.flags.writeable else key.copy()
         try:
-            return X.iloc[indices]
+            return X.iloc[key]
         except ValueError:
             # Cython typed memoryviews internally used in pandas do not support
             # readonly buffers.
             warnings.warn("Copying input dataframe for slicing.",
                           DataConversionWarning)
-            return X.copy().iloc[indices]
+            return X.copy().iloc[key]
     elif hasattr(X, "shape"):
-        return _array_indexing(X, indices, axis=0)
-    elif not isinstance(indices, Iterable) or indices.ndim == 0:
-        # In the case of a slice or a scalar
-        return X[indices]
+        # NumPy array and SciPy sparse matrix
+        return _array_indexing(X, key, axis=0)
     else:
-        if np.issubdtype(indices.dtype, np.bool_):
-            indices = np.flatnonzero(indices)
-        return [X[idx] for idx in indices]
+        # Python list
+        if not isinstance(key, Iterable) or isinstance(indexable, slice):
+            # key being a slice or a scalar
+            return X[key]
+        key_set = set(key)
+        if len(key_set) == 2 and all(isinstance(k, (bool, np.bool_))
+                                     for k in key_set):
+            # key is a boolean array-like
+            return list(compress(X, key))
+        # key is a integer array-like of key
+        return [X[idx] for idx in key]
 
 
 def _check_key_type(key, superclass):

From 7127b5a9cf2efcc01ac8c9028111d74a33fe1088 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 18:31:00 +0200
Subject: [PATCH 29/86] refactor

---
 sklearn/utils/__init__.py         | 202 +++++++++++-------------------
 sklearn/utils/tests/test_utils.py |   7 --
 2 files changed, 71 insertions(+), 138 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index cad6ecaa64f80..5a41c4a8bd298 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -182,56 +182,8 @@ def axis0_safe_slice(X, mask, len_mask):
     return np.zeros(shape=(0, X.shape[1]))
 
 
-def safe_indexing(X, indices, axis=0):
-    """Return rows, items or columns of X using indices.
-
-    Parameters
-    ----------
-    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
-        Data from which to sample rows, items or columns.
-    indices : bool, int, str, array-like
-        - To select a single element (i.e. row or column), `indices` can be one
-          of the following: `bool` or `int` which are supported by all types of
-          `X`. `indices` being a `str` is only supported for `X` being a
-          dataframe. The selected subset will be 1D, unless `X` is a sparse
-          matrix in which case it will be 2D.
-        - To select multiple elements (i.e. rows or columns), `indices` can be
-          one of the following: `list`, `array`, `slice`. The type used in
-          these containers can be one of the following: `int`, `bool`, and
-          `str`. `str` is only supported when `X` is a dataframe.
-          The selected subset will be 2D.
-    axis : int, default=0
-        The axis along which `X` will be subsampled. ``axis=0`` will select
-        rows while ``axis=1`` will select columns.
-
-    Returns
-    -------
-    subset
-        Subset of X on axis 0 or 1.
-
-    Notes
-    -----
-    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
-    not supported.
-    """
-    if axis == 0:
-        return _safe_indexing_row(X, indices)
-    elif axis == 1:
-        return _safe_indexing_column(X, indices)
-    else:
-        raise ValueError(
-            "'axis' should be either 0 (to index rows) or 1 (to index "
-            " column). Got {} instead.".format(axis)
-        )
-
-
-def _array_indexing(array, key, axis=0):
+def _array_indexing(array, key, axis):
     """Index an array consistently across NumPy version."""
-    if axis not in (0, 1):
-        raise ValueError(
-            "'axis' should be either 0 (to index rows) or 1 (to index "
-            " column). Got {} instead.".format(axis)
-        )
     if np_version < (1, 13) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
@@ -240,48 +192,18 @@ def _array_indexing(array, key, axis=0):
     return array[key] if axis == 0 else array[:, key]
 
 
-def _safe_indexing_row(X, key):
-    """Return items or rows from X using indices.
+def _pandas_indexing(X, key, axis, by_name):
+    """Index a pandas dataframe or a series."""
+    if hasattr(key, 'flags'):
+        # Work-around for indexing with read-only key in pandas
+        key = key if key.flags.writeable else key.copy()
+    indexer = 'loc' if by_name else 'iloc'
+    return getattr(X, indexer)[:, key] if axis else getattr(X, indexer)[key]
 
-    Allows simple indexing of lists, NumPy array, SciPy sparse matrices, and
-    Pandas DataFrame`.
 
-    Parameters
-    ----------
-    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
-        Data from which to sample rows or items.
-    key : int, slice, bool or int array-like
-        Key used to get a subset of X.
-
-    Returns
-    -------
-    subset
-        Subset of X on first axis.
-
-    Notes
-    -----
-    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
-    not supported.
-    """
-    if key is None:
-        return X
-    if hasattr(X, "iloc"):
-        # Pandas Dataframes and Series
-        if hasattr(key, 'flags'):
-            # Work-around for indexing with read-only key in pandas
-            key = key if key.flags.writeable else key.copy()
-        try:
-            return X.iloc[key]
-        except ValueError:
-            # Cython typed memoryviews internally used in pandas do not support
-            # readonly buffers.
-            warnings.warn("Copying input dataframe for slicing.",
-                          DataConversionWarning)
-            return X.copy().iloc[key]
-    elif hasattr(X, "shape"):
-        # NumPy array and SciPy sparse matrix
-        return _array_indexing(X, key, axis=0)
-    else:
+def _list_indexing(X, key, axis):
+    """Index a Python list."""
+    if axis == 0:
         # Python list
         if not isinstance(key, Iterable) or isinstance(indexable, slice):
             # key being a slice or a scalar
@@ -315,7 +237,7 @@ def _check_key_type(key, superclass):
         return (isinstance(key.start, (superclass, type(None))) and
                 isinstance(key.stop, (superclass, type(None))))
     if isinstance(key, list):
-        return all(isinstance(x, superclass) for x in key)
+        return all(isinstance(x, superclass) for x in set(key))
     if hasattr(key, 'dtype'):
         if superclass is int:
             return key.dtype.kind == 'i'
@@ -327,60 +249,78 @@ def _check_key_type(key, superclass):
     return False
 
 
-def _safe_indexing_column(X, key):
-    """Get feature column(s) from input data X.
+def safe_indexing(X, indices, axis=0):
+    """Return rows, items or columns of X using indices.
 
-    Supported input types (X): numpy arrays, sparse arrays and DataFrames.
+    Parameters
+    ----------
+    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
+        Data from which to sample rows, items or columns.
+    indices : bool, int, str, array-like
+        - To select a single element (i.e. row or column), `indices` can be one
+          of the following: `bool` or `int` which are supported by all types of
+          `X`. `indices` being a `str` is only supported for `X` being a
+          dataframe. The selected subset will be 1D, unless `X` is a sparse
+          matrix in which case it will be 2D.
+        - To select multiple elements (i.e. rows or columns), `indices` can be
+          one of the following: `list`, `array`, `slice`. The type used in
+          these containers can be one of the following: `int`, `bool`, and
+          `str`. `str` is only supported when `X` is a dataframe.
+          The selected subset will be 2D.
+    axis : int, default=0
+        The axis along which `X` will be subsampled. ``axis=0`` will select
+        rows while ``axis=1`` will select columns.
 
-    Supported key types (key):
-    - scalar: output is 1D;
-    - lists, slices, boolean masks: output is 2D.
+    Returns
+    -------
+    subset
+        Subset of X on axis 0 or 1.
 
-    Supported key data types:
-    - integer or boolean mask (positional):
-        - supported for arrays, sparse matrices and dataframes.
-    - string (key-based):
-        - only supported for dataframes;
-        - So no keys other than strings are allowed (while in principle you
-          can use any hashable object as key).
+    Notes
+    -----
+    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
+    not supported.
     """
-    # check that X is a 2D structure
-    if X.ndim != 2:
-        raise ValueError(
-            "'X' should be a 2D NumPy array, 2D sparse matrix or pandas "
-            "dataframe when indexing the columns (i.e. 'axis=1'). "
-            "Got {} instead with {} dimension(s).".format(type(X), X.ndim)
-        )
-    # check whether we have string column names or integers
-    if _check_key_type(key, int):
-        column_names = False
-    elif _check_key_type(key, str):
-        column_names = True
-    elif hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_):
+    if indices is None:
+        return X
+    if _check_key_type(indices, int):
+        by_name = False
+    elif _check_key_type(indices, str):
+        by_name = True
+    elif _check_key_type(indices, bool):
         # boolean mask
-        column_names = False
+        by_name = False
         if hasattr(X, 'loc'):
             # pandas boolean masks don't work with iloc, so take loc path
-            column_names = True
+            by_name = True
     else:
         raise ValueError("No valid specification of the columns. Only a "
                          "scalar, list or slice of all integers or all "
                          "strings, or boolean mask is allowed")
 
-    if column_names:
-        if hasattr(X, 'loc'):
-            # pandas dataframes
-            return X.loc[:, key]
-        else:
-            raise ValueError("Specifying the columns using strings is only "
-                             "supported for pandas DataFrames")
+    if axis not in (0, 1):
+        raise ValueError(
+            "'axis' should be either 0 (to index rows) or 1 (to index "
+            " column). Got {} instead.".format(axis)
+        )
+
+    if axis == 1 and X.ndim != 2:
+        raise ValueError(
+            "'X' should be a 2D NumPy array, 2D sparse matrix or pandas "
+            "dataframe when indexing the columns (i.e. 'axis=1'). "
+            "Got {} instead with {} dimension(s).".format(type(X), X.ndim)
+        )
+
+    if by_name and not hasattr(X, 'loc'):
+        raise ValueError("Specifying the columns using strings is only "
+                         "supported for pandas DataFrames")
+
+    if hasattr(X, "iloc"):
+        return _pandas_indexing(X, indices, axis=axis, by_name=by_name)
+    elif hasattr(X, "shape"):
+        return _array_indexing(X, indices, axis=axis)
     else:
-        if hasattr(X, 'iloc'):
-            # pandas dataframes
-            return X.iloc[:, key]
-        else:
-            # numpy arrays, sparse arrays
-            return _array_indexing(X, key, axis=1)
+        return _list_indexing(X, indices, axis=axis)
 
 
 def _get_column_indices(X, key):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index da7306c1f2a1b..73b342d0e5e50 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -442,13 +442,6 @@ def test_safe_indexing_mask_axis_1(array_type):
     assert_allclose_dense_sparse(X_masked, X_masked_array)
 
 
-def test_array_indexing_array_error():
-    X = np.array([[0, 1], [2, 3]])
-    mask = [True, False]
-    with pytest.raises(ValueError, match="'axis' should be either 0"):
-        _array_indexing(X, mask, axis=3)
-
-
 def test_shuffle_on_ndim_equals_three():
     def to_tuple(A):    # to make the inner arrays hashable
         return tuple(tuple(tuple(C) for C in B) for B in A)

From 2f96882bbf9619c4c5cc4b08de12dec7590e1251 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 18:39:02 +0200
Subject: [PATCH 30/86] PEP8 comments

---
 sklearn/utils/__init__.py         | 6 +++---
 sklearn/utils/tests/test_utils.py | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 5a41c4a8bd298..47b704a59d12d 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -256,7 +256,7 @@ def safe_indexing(X, indices, axis=0):
     ----------
     X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
         Data from which to sample rows, items or columns.
-    indices : bool, int, str, array-like
+    indices : bool, int, str, slice, array-like
         - To select a single element (i.e. row or column), `indices` can be one
           of the following: `bool` or `int` which are supported by all types of
           `X`. `indices` being a `str` is only supported for `X` being a
@@ -264,8 +264,8 @@ def safe_indexing(X, indices, axis=0):
           matrix in which case it will be 2D.
         - To select multiple elements (i.e. rows or columns), `indices` can be
           one of the following: `list`, `array`, `slice`. The type used in
-          these containers can be one of the following: `int`, `bool`, and
-          `str`. `str` is only supported when `X` is a dataframe.
+          these containers can be one of the following: `int` and `str`.
+          However, `str` is only supported when `X` is a dataframe.
           The selected subset will be 2D.
     axis : int, default=0
         The axis along which `X` will be subsampled. ``axis=0`` will select
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 73b342d0e5e50..6ebc378890bf3 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -13,7 +13,6 @@
                                    assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
-from sklearn.utils import _array_indexing
 from sklearn.utils import check_random_state
 from sklearn.utils import _check_key_type
 from sklearn.utils import deprecated

From 619fb0526c5b905e31ab11da580c197b505690ac Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 18:41:00 +0200
Subject: [PATCH 31/86] iter

---
 sklearn/utils/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 47b704a59d12d..29e1b324ea9c6 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -255,7 +255,8 @@ def safe_indexing(X, indices, axis=0):
     Parameters
     ----------
     X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
-        Data from which to sample rows, items or columns.
+        Data from which to sample rows, items or columns. `list` are only
+        supported when `axis=0`.
     indices : bool, int, str, slice, array-like
         - To select a single element (i.e. row or column), `indices` can be one
           of the following: `bool` or `int` which are supported by all types of

From b7539bd75ea5bd8276621b16aa4f15165e66a71a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 18:46:55 +0200
Subject: [PATCH 32/86] style

---
 sklearn/utils/__init__.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 29e1b324ea9c6..9e6396e7b5ba1 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -201,20 +201,18 @@ def _pandas_indexing(X, key, axis, by_name):
     return getattr(X, indexer)[:, key] if axis else getattr(X, indexer)[key]
 
 
-def _list_indexing(X, key, axis):
+def _list_indexing(X, key):
     """Index a Python list."""
-    if axis == 0:
-        # Python list
-        if not isinstance(key, Iterable) or isinstance(indexable, slice):
-            # key being a slice or a scalar
-            return X[key]
-        key_set = set(key)
-        if len(key_set) == 2 and all(isinstance(k, (bool, np.bool_))
-                                     for k in key_set):
-            # key is a boolean array-like
-            return list(compress(X, key))
-        # key is a integer array-like of key
-        return [X[idx] for idx in key]
+    if not isinstance(key, Iterable) or isinstance(indexable, slice):
+        # key is a slice or a scalar
+        return X[key]
+    key_set = set(key)
+    if (len(key_set) == 2 and
+            all(isinstance(k, (bool, np.bool_)) for k in key_set)):
+        # key is a boolean array-like
+        return list(compress(X, key))
+    # key is a integer array-like of key
+    return [X[idx] for idx in key]
 
 
 def _check_key_type(key, superclass):
@@ -321,7 +319,7 @@ def safe_indexing(X, indices, axis=0):
     elif hasattr(X, "shape"):
         return _array_indexing(X, indices, axis=axis)
     else:
-        return _list_indexing(X, indices, axis=axis)
+        return _list_indexing(X, indices)
 
 
 def _get_column_indices(X, key):

From 18fba6c6f7b51ce63bb5b435bf1828eac32ca158 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 1 Aug 2019 13:02:25 -0400
Subject: [PATCH 33/86] make check_is_fitted not take attributes

---
 benchmarks/bench_plot_nmf.py                  |  4 +--
 doc/developers/contributing.rst               |  2 +-
 sklearn/calibration.py                        |  4 +--
 sklearn/cluster/_feature_agglomeration.py     |  4 +--
 sklearn/cluster/affinity_propagation_.py      |  2 +-
 sklearn/cluster/birch.py                      |  4 +--
 sklearn/cluster/k_means_.py                   |  8 +++---
 sklearn/cluster/mean_shift_.py                |  2 +-
 sklearn/compose/_column_transformer.py        |  4 +--
 sklearn/compose/_target.py                    |  2 +-
 sklearn/covariance/elliptic_envelope.py       |  4 +--
 sklearn/cross_decomposition/pls_.py           |  6 ++--
 sklearn/decomposition/base.py                 |  2 +-
 sklearn/decomposition/dict_learning.py        |  2 +-
 sklearn/decomposition/factor_analysis.py      |  8 +++---
 sklearn/decomposition/fastica_.py             |  4 +--
 sklearn/decomposition/kernel_pca.py           |  2 +-
 sklearn/decomposition/nmf.py                  |  4 +--
 sklearn/decomposition/online_lda.py           |  4 +--
 sklearn/decomposition/pca.py                  |  2 +-
 sklearn/decomposition/sparse_pca.py           |  2 +-
 sklearn/discriminant_analysis.py              |  6 ++--
 sklearn/dummy.py                              |  6 ++--
 .../_hist_gradient_boosting/binning.py        |  2 +-
 sklearn/ensemble/bagging.py                   |  8 +++---
 sklearn/ensemble/forest.py                    | 10 +++----
 sklearn/ensemble/gradient_boosting.py         |  4 +--
 sklearn/ensemble/iforest.py                   |  4 +--
 sklearn/ensemble/voting.py                    | 10 +++----
 sklearn/ensemble/weight_boosting.py           | 10 +++----
 sklearn/feature_extraction/text.py            |  6 ++--
 sklearn/feature_selection/rfe.py              | 12 ++++----
 .../feature_selection/univariate_selection.py | 12 ++++----
 .../feature_selection/variance_threshold.py   |  2 +-
 sklearn/gaussian_process/gpc.py               | 10 +++----
 sklearn/impute/_base.py                       |  4 +--
 sklearn/impute/_iterative.py                  |  2 +-
 sklearn/kernel_approximation.py               |  6 ++--
 sklearn/kernel_ridge.py                       |  2 +-
 sklearn/linear_model/base.py                  |  4 +--
 sklearn/linear_model/coordinate_descent.py    |  2 +-
 sklearn/linear_model/logistic.py              |  2 +-
 sklearn/linear_model/ransac.py                |  4 +--
 sklearn/linear_model/stochastic_gradient.py   |  4 +--
 sklearn/manifold/locally_linear.py            |  2 +-
 sklearn/mixture/gaussian_mixture.py           |  2 +-
 sklearn/model_selection/_search.py            |  2 +-
 sklearn/multiclass.py                         | 20 ++++++-------
 sklearn/multioutput.py                        |  8 +++---
 sklearn/naive_bayes.py                        |  8 +++---
 sklearn/neighbors/base.py                     |  8 +++---
 sklearn/neighbors/nca.py                      |  2 +-
 sklearn/neighbors/nearest_centroid.py         |  2 +-
 .../neural_network/multilayer_perceptron.py   |  6 ++--
 sklearn/neural_network/rbm.py                 |  6 ++--
 sklearn/preprocessing/_discretization.py      |  4 +--
 sklearn/preprocessing/_encoders.py            |  8 +++---
 sklearn/preprocessing/data.py                 | 28 +++++++++----------
 sklearn/preprocessing/label.py                | 12 ++++----
 sklearn/random_projection.py                  |  2 +-
 sklearn/semi_supervised/label_propagation.py  |  2 +-
 sklearn/svm/base.py                           |  4 +--
 sklearn/tests/test_metaestimators.py          |  2 +-
 sklearn/tree/tree.py                          | 12 ++++----
 sklearn/utils/tests/test_estimator_checks.py  |  2 +-
 sklearn/utils/tests/test_validation.py        | 16 +++++------
 sklearn/utils/validation.py                   |  8 +++---
 67 files changed, 187 insertions(+), 187 deletions(-)

diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 3ec7cea92cf2d..d8d34d8f952ce 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -213,13 +213,13 @@ def fit(self, X, y=None, **params):
         return self
 
     def transform(self, X):
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
         H = self.components_
         W, _, self.n_iter_ = self._fit_transform(X, H=H, update_H=False)
         return W
 
     def inverse_transform(self, W):
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
         return np.dot(W, self.components_)
 
     def fit_transform(self, X, y=None, W=None, H=None):
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 1ef8f2d03e14c..27d7236bf02d4 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -1354,7 +1354,7 @@ the correct interface more easily.
       ...     def predict(self, X):
       ...
       ...         # Check is fit had been called
-      ...         check_is_fitted(self, ['X_', 'y_'])
+      ...         check_is_fitted(self)
       ...
       ...         # Input validation
       ...         X = check_array(X)
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 2c30cdabcb415..b88a8b8eb37ef 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -216,7 +216,7 @@ def predict_proba(self, X):
         C : array, shape (n_samples, n_classes)
             The predicted probas.
         """
-        check_is_fitted(self, ["classes_", "calibrated_classifiers_"])
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],
                         force_all_finite=False)
         # Compute the arithmetic mean of the predictions of the calibrated
@@ -244,7 +244,7 @@ def predict(self, X):
         C : array, shape (n_samples,)
             The predicted class.
         """
-        check_is_fitted(self, ["classes_", "calibrated_classifiers_"])
+        check_is_fitted(self)
         return self.classes_[np.argmax(self.predict_proba(X), axis=1)]
 
 
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index f20b8db7d535c..3b7767feedb00 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -36,7 +36,7 @@ def transform(self, X):
         Y : array, shape = [n_samples, n_clusters] or [n_clusters]
             The pooled values for each feature cluster.
         """
-        check_is_fitted(self, "labels_")
+        check_is_fitted(self)
 
         X = check_array(X)
         if len(self.labels_) != X.shape[1]:
@@ -71,7 +71,7 @@ def inverse_transform(self, Xred):
             A vector of size n_samples with the values of Xred assigned to
             each of the cluster of samples.
         """
-        check_is_fitted(self, "labels_")
+        check_is_fitted(self)
 
         unil, inverse = np.unique(self.labels_, return_inverse=True)
         return Xred[..., inverse]
diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py
index 487ade4012133..89c6ce9fe8b34 100644
--- a/sklearn/cluster/affinity_propagation_.py
+++ b/sklearn/cluster/affinity_propagation_.py
@@ -407,7 +407,7 @@ def predict(self, X):
         labels : ndarray, shape (n_samples,)
             Cluster labels.
         """
-        check_is_fitted(self, "cluster_centers_indices_")
+        check_is_fitted(self)
         if not hasattr(self, "cluster_centers_"):
             raise ValueError("Predict method is not supported when "
                              "affinity='precomputed'.")
diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 27b5038bb67a3..941b833e977f7 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -534,7 +534,7 @@ def partial_fit(self, X=None, y=None):
             return self._fit(X)
 
     def _check_fit(self, X):
-        check_is_fitted(self, ['subcluster_centers_', 'partial_fit_'],
+        check_is_fitted(self,
                         all_or_any=any)
 
         if (hasattr(self, 'subcluster_centers_') and
@@ -583,7 +583,7 @@ def transform(self, X):
         X_trans : {array-like, sparse matrix}, shape (n_samples, n_clusters)
             Transformed data.
         """
-        check_is_fitted(self, 'subcluster_centers_')
+        check_is_fitted(self)
         return euclidean_distances(X, self.subcluster_centers_)
 
     def _global_clustering(self, X=None):
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index b7fbdf7da3ad1..27bdc934e4c0d 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -1033,7 +1033,7 @@ def transform(self, X):
         X_new : array, shape [n_samples, k]
             X transformed in the new space.
         """
-        check_is_fitted(self, 'cluster_centers_')
+        check_is_fitted(self)
 
         X = self._check_test_data(X)
         return self._transform(X)
@@ -1063,7 +1063,7 @@ def predict(self, X, sample_weight=None):
         labels : array, shape [n_samples,]
             Index of the cluster each sample belongs to.
         """
-        check_is_fitted(self, 'cluster_centers_')
+        check_is_fitted(self)
 
         X = self._check_test_data(X)
         x_squared_norms = row_norms(X, squared=True)
@@ -1090,7 +1090,7 @@ def score(self, X, y=None, sample_weight=None):
         score : float
             Opposite of the value of X on the K-means objective.
         """
-        check_is_fitted(self, 'cluster_centers_')
+        check_is_fitted(self)
 
         X = self._check_test_data(X)
         x_squared_norms = row_norms(X, squared=True)
@@ -1733,7 +1733,7 @@ def predict(self, X, sample_weight=None):
         labels : array, shape [n_samples,]
             Index of the cluster each sample belongs to.
         """
-        check_is_fitted(self, 'cluster_centers_')
+        check_is_fitted(self)
 
         X = self._check_test_data(X)
         return self._labels_inertia_minibatch(X, sample_weight)[0]
diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py
index 960ac28984721..e588ccd6df1c8 100644
--- a/sklearn/cluster/mean_shift_.py
+++ b/sklearn/cluster/mean_shift_.py
@@ -435,6 +435,6 @@ def predict(self, X):
         labels : array, shape [n_samples,]
             Index of the cluster each sample belongs to.
         """
-        check_is_fitted(self, "cluster_centers_")
+        check_is_fitted(self)
 
         return pairwise_distances_argmin(X, self.cluster_centers_)
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index c0f537776cb6a..1d460b11dc480 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -341,7 +341,7 @@ def get_feature_names(self):
         feature_names : list of strings
             Names of the features produced by transform.
         """
-        check_is_fitted(self, 'transformers_')
+        check_is_fitted(self)
         feature_names = []
         for name, trans, _, _ in self._iter(fitted=True):
             if trans == 'drop':
@@ -516,7 +516,7 @@ def transform(self, X):
             sparse matrices.
 
         """
-        check_is_fitted(self, 'transformers_')
+        check_is_fitted(self)
         X = _check_X(X)
 
         if self._n_features > X.shape[1]:
diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
index c1c3f4df4e95f..35b7ed6af962a 100644
--- a/sklearn/compose/_target.py
+++ b/sklearn/compose/_target.py
@@ -221,7 +221,7 @@ def predict(self, X):
             Predicted values.
 
         """
-        check_is_fitted(self, "regressor_")
+        check_is_fitted(self)
         pred = self.regressor_.predict(X)
         if pred.ndim == 1:
             pred_trans = self.transformer_.inverse_transform(
diff --git a/sklearn/covariance/elliptic_envelope.py b/sklearn/covariance/elliptic_envelope.py
index 517f9a32dc9af..aa5e01ffa14b0 100644
--- a/sklearn/covariance/elliptic_envelope.py
+++ b/sklearn/covariance/elliptic_envelope.py
@@ -147,7 +147,7 @@ def decision_function(self, X):
             compatibility with other outlier detection algorithms.
 
         """
-        check_is_fitted(self, 'offset_')
+        check_is_fitted(self)
         negative_mahal_dist = self.score_samples(X)
         return negative_mahal_dist - self.offset_
 
@@ -163,7 +163,7 @@ def score_samples(self, X):
         negative_mahal_distances : array-like, shape (n_samples, )
             Opposite of the Mahalanobis distances.
         """
-        check_is_fitted(self, 'offset_')
+        check_is_fitted(self)
         return -self.mahalanobis(X)
 
     def predict(self, X):
diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py
index 175a472e6d4fb..94c517992e061 100644
--- a/sklearn/cross_decomposition/pls_.py
+++ b/sklearn/cross_decomposition/pls_.py
@@ -398,7 +398,7 @@ def transform(self, X, Y=None, copy=True):
         -------
         x_scores if Y is not given, (x_scores, y_scores) otherwise.
         """
-        check_is_fitted(self, 'x_mean_')
+        check_is_fitted(self)
         X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         # Normalize
         X -= self.x_mean_
@@ -433,7 +433,7 @@ def predict(self, X, copy=True):
         This call requires the estimation of a p x q matrix, which may
         be an issue in high dimensional space.
         """
-        check_is_fitted(self, 'x_mean_')
+        check_is_fitted(self)
         X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         # Normalize
         X -= self.x_mean_
@@ -872,7 +872,7 @@ def transform(self, X, Y=None):
             Target vectors, where n_samples is the number of samples and
             n_targets is the number of response variables.
         """
-        check_is_fitted(self, 'x_mean_')
+        check_is_fitted(self)
         X = check_array(X, dtype=np.float64)
         Xr = (X - self.x_mean_) / self.x_std_
         x_scores = np.dot(Xr, self.x_weights_)
diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
index 3cbdb29723825..0dad8c6130d68 100644
--- a/sklearn/decomposition/base.py
+++ b/sklearn/decomposition/base.py
@@ -122,7 +122,7 @@ def transform(self, X):
         IncrementalPCA(batch_size=3, n_components=2)
         >>> ipca.transform(X) # doctest: +SKIP
         """
-        check_is_fitted(self, ['mean_', 'components_'], all_or_any=all)
+        check_is_fitted(self, all_or_any=all)
 
         X = check_array(X)
         if self.mean_ is not None:
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 56187948f8554..6fa6d1e7f2d6f 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -911,7 +911,7 @@ def transform(self, X):
             Transformed data
 
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         X = check_array(X)
 
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
index f9d81737850ff..1306c4245a7f3 100644
--- a/sklearn/decomposition/factor_analysis.py
+++ b/sklearn/decomposition/factor_analysis.py
@@ -261,7 +261,7 @@ def transform(self, X):
         X_new : array-like, shape (n_samples, n_components)
             The latent variables of X.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         X = check_array(X)
         Ih = np.eye(len(self.components_))
@@ -285,7 +285,7 @@ def get_covariance(self):
         cov : array, shape (n_features, n_features)
             Estimated covariance of data.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         cov = np.dot(self.components_.T, self.components_)
         cov.flat[::len(cov) + 1] += self.noise_variance_  # modify diag inplace
@@ -299,7 +299,7 @@ def get_precision(self):
         precision : array, shape (n_features, n_features)
             Estimated precision of data.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         n_features = self.components_.shape[1]
 
@@ -333,7 +333,7 @@ def score_samples(self, X):
         ll : array, shape (n_samples,)
             Log-likelihood of each sample under the current model
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         Xr = X - self.mean_
         precision = self.get_precision()
diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py
index d841926cdfc87..3f6f1af632494 100644
--- a/sklearn/decomposition/fastica_.py
+++ b/sklearn/decomposition/fastica_.py
@@ -574,7 +574,7 @@ def transform(self, X, copy=True):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        check_is_fitted(self, 'mixing_')
+        check_is_fitted(self)
 
         X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
         if self.whiten:
@@ -597,7 +597,7 @@ def inverse_transform(self, X, copy=True):
         -------
         X_new : array-like, shape (n_samples, n_features)
         """
-        check_is_fitted(self, 'mixing_')
+        check_is_fitted(self)
 
         X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)
         X = np.dot(X, self.mixing_.T)
diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py
index 555bd619c5a62..59785fed3ac0e 100644
--- a/sklearn/decomposition/kernel_pca.py
+++ b/sklearn/decomposition/kernel_pca.py
@@ -319,7 +319,7 @@ def transform(self, X):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        check_is_fitted(self, 'X_fit_')
+        check_is_fitted(self)
 
         # Compute centered gram matrix between X and training data X_fit_
         K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
index f64bc34b7fad7..0233688ae696e 100644
--- a/sklearn/decomposition/nmf.py
+++ b/sklearn/decomposition/nmf.py
@@ -1313,7 +1313,7 @@ def transform(self, X):
         W : array, shape (n_samples, n_components)
             Transformed data
         """
-        check_is_fitted(self, 'n_components_')
+        check_is_fitted(self)
 
         W, _, n_iter_ = non_negative_factorization(
             X=X, W=None, H=self.components_, n_components=self.n_components_,
@@ -1340,5 +1340,5 @@ def inverse_transform(self, W):
 
         .. versionadded:: 0.18
         """
-        check_is_fitted(self, 'n_components_')
+        check_is_fitted(self)
         return np.dot(W, self.components_)
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index c1d482f0a46c6..503cc24692e25 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -594,7 +594,7 @@ def _unnormalized_transform(self, X):
         doc_topic_distr : shape=(n_samples, n_components)
             Document topic distribution for X.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         # make sure feature size is the same in fitted model and in X
         X = self._check_non_neg_array(X, "LatentDirichletAllocation.transform")
@@ -748,7 +748,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
         score : float
             Perplexity score.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         X = self._check_non_neg_array(X,
                                       "LatentDirichletAllocation.perplexity")
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 99e392020abaf..1bf3d6e6b19e6 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -569,7 +569,7 @@ def score_samples(self, X):
         ll : array, shape (n_samples,)
             Log-likelihood of each sample under the current model
         """
-        check_is_fitted(self, 'mean_')
+        check_is_fitted(self)
 
         X = check_array(X)
         Xr = X - self.mean_
diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py
index 238f6cc4ef403..3ca14cb528bb8 100644
--- a/sklearn/decomposition/sparse_pca.py
+++ b/sklearn/decomposition/sparse_pca.py
@@ -221,7 +221,7 @@ def transform(self, X):
         X_new array, shape (n_samples, n_components)
             Transformed data.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         X = check_array(X)
         X = X - self.mean_
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 9634b303ea946..4a3542e204288 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -505,7 +505,7 @@ def transform(self, X):
         if self.solver == 'lsqr':
             raise NotImplementedError("transform not implemented for 'lsqr' "
                                       "solver (use 'svd' or 'eigen').")
-        check_is_fitted(self, ['xbar_', 'scalings_'], all_or_any=any)
+        check_is_fitted(self, all_or_any=any)
 
         X = check_array(X)
         if self.solver == 'svd':
@@ -528,7 +528,7 @@ def predict_proba(self, X):
         C : array, shape (n_samples, n_classes)
             Estimated probabilities.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         decision = self.decision_function(X)
         if self.classes_.size == 2:
@@ -704,7 +704,7 @@ def fit(self, X, y):
         return self
 
     def _decision_function(self, X):
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         X = check_array(X)
         norm2 = []
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 02d8a448c9766..067a956f6435d 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -171,7 +171,7 @@ def predict(self, X):
         y : array, shape = [n_samples] or [n_samples, n_outputs]
             Predicted target values for X.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         # numpy random_state expects Python int and not long as size argument
         # under Windows
@@ -249,7 +249,7 @@ def predict_proba(self, X):
             the model, where classes are ordered arithmetically, for each
             output.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         # numpy random_state expects Python int and not long as size argument
         # under Windows
@@ -498,7 +498,7 @@ def predict(self, X, return_std=False):
         y_std : array, shape = [n_samples] or [n_samples, n_outputs]
             Standard deviation of predictive distribution of query points.
         """
-        check_is_fitted(self, "constant_")
+        check_is_fitted(self)
         n_samples = _num_samples(X)
 
         y = np.full((n_samples, self.n_outputs_), self.constant_,
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index a7ddc9a3ebb47..b35b2a2083b03 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -143,7 +143,7 @@ def transform(self, X):
             The binned data (fortran-aligned).
         """
         X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)
-        check_is_fitted(self, ['bin_thresholds_', 'actual_n_bins_'])
+        check_is_fitted(self)
         if X.shape[1] != self.actual_n_bins_.shape[0]:
             raise ValueError(
                 'This estimator was fitted with {} features but {} got passed '
diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
index 15096afefa810..7829b99b050f1 100644
--- a/sklearn/ensemble/bagging.py
+++ b/sklearn/ensemble/bagging.py
@@ -672,7 +672,7 @@ def predict_proba(self, X):
             The class probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
         # Check data
         X = check_array(
             X, accept_sparse=['csr', 'csc'], dtype=None,
@@ -722,7 +722,7 @@ def predict_log_proba(self, X):
             The class log-probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
         if hasattr(self.base_estimator_, "predict_log_proba"):
             # Check data
             X = check_array(
@@ -780,7 +780,7 @@ def decision_function(self, X):
             cases with ``k == 1``, otherwise ``k==n_classes``.
 
         """
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
 
         # Check data
         X = check_array(
@@ -965,7 +965,7 @@ def predict(self, X):
         y : array of shape = [n_samples]
             The predicted values.
         """
-        check_is_fitted(self, "estimators_features_")
+        check_is_fitted(self)
         # Check data
         X = check_array(
             X, accept_sparse=['csr', 'csc'], dtype=None,
diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
index a3513fdf32e41..b0fff1f6c9181 100644
--- a/sklearn/ensemble/forest.py
+++ b/sklearn/ensemble/forest.py
@@ -346,7 +346,7 @@ def _validate_y_class_weight(self, y):
 
     def _validate_X_predict(self, X):
         """Validate X whenever one tries to predict, apply, predict_proba"""
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
         return self.estimators_[0]._validate_X_predict(X, check_input=True)
 
@@ -362,7 +362,7 @@ def feature_importances_(self):
             trees consisting of only the root node, in which case it will be an
             array of zeros.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
         all_importances = Parallel(n_jobs=self.n_jobs,
                                    **_joblib_parallel_args(prefer='threads'))(
@@ -575,7 +575,7 @@ class in a leaf.
             The class probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         # Check data
         X = self._validate_X_predict(X)
 
@@ -680,7 +680,7 @@ def predict(self, X):
         y : array of shape = [n_samples] or [n_samples, n_outputs]
             The predicted values.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         # Check data
         X = self._validate_X_predict(X)
 
@@ -2026,5 +2026,5 @@ def transform(self, X):
         X_transformed : sparse matrix, shape=(n_samples, n_out)
             Transformed dataset.
         """
-        check_is_fitted(self, 'one_hot_encoder_')
+        check_is_fitted(self)
         return self.one_hot_encoder_.transform(self.apply(X))
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index cc43df36ba608..11813855d01d8 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -1390,7 +1390,7 @@ def _is_initialized(self):
 
     def _check_initialized(self):
         """Check that the estimator is initialized, raising an error if not."""
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
     def fit(self, X, y, sample_weight=None, monitor=None):
         """Fit the gradient boosting model.
@@ -1741,7 +1741,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
                 (n_trees_per_iteration, n_samples)
             The value of the partial dependence function on each grid point.
         """
-        check_is_fitted(self, 'estimators_',
+        check_is_fitted(self,
                         msg="'estimator' parameter must be a fitted estimator")
         if self.init is not None:
             warnings.warn(
diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py
index 8aaae2925ccaf..4cdeb9673ccdb 100644
--- a/sklearn/ensemble/iforest.py
+++ b/sklearn/ensemble/iforest.py
@@ -303,7 +303,7 @@ def predict(self, X):
             For each observation, tells whether or not (+1 or -1) it should
             be considered as an inlier according to the fitted model.
         """
-        check_is_fitted(self, ["offset_"])
+        check_is_fitted(self)
         X = check_array(X, accept_sparse='csr')
         is_inlier = np.ones(X.shape[0], dtype=int)
         is_inlier[self.decision_function(X) < 0] = -1
@@ -365,7 +365,7 @@ def score_samples(self, X):
             The lower, the more abnormal.
         """
         # code structure from ForestClassifier/predict_proba
-        check_is_fitted(self, ["estimators_"])
+        check_is_fitted(self)
 
         # Check data
         X = check_array(X, accept_sparse='csr')
diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py
index 7900d28c1f782..69381a39d9ce3 100644
--- a/sklearn/ensemble/voting.py
+++ b/sklearn/ensemble/voting.py
@@ -296,7 +296,7 @@ def predict(self, X):
             Predicted class labels.
         """
 
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if self.voting == 'soft':
             maj = np.argmax(self.predict_proba(X), axis=1)
 
@@ -317,7 +317,7 @@ def _collect_probas(self, X):
 
     def _predict_proba(self, X):
         """Predict class probabilities for X in 'soft' voting """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         avg = np.average(self._collect_probas(X), axis=0,
                          weights=self._weights_not_none)
         return avg
@@ -363,7 +363,7 @@ def transform(self, X):
                 array-like of shape (n_samples, n_classifiers), being
                 class labels predicted by each classifier.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
         if self.voting == 'soft':
             probas = self._collect_probas(X)
@@ -477,7 +477,7 @@ def predict(self, X):
         y : array of shape (n_samples,)
             The predicted values.
         """
-        check_is_fitted(self, "estimators_")
+        check_is_fitted(self)
         return np.average(self._predict(X), axis=1,
                           weights=self._weights_not_none)
 
@@ -495,5 +495,5 @@ def transform(self, X):
             array-like of shape (n_samples, n_classifiers), being
             values predicted by each regressor.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         return self._predict(X)
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
index 2d0ba849f9990..9d3f1611a9d70 100644
--- a/sklearn/ensemble/weight_boosting.py
+++ b/sklearn/ensemble/weight_boosting.py
@@ -674,7 +674,7 @@ def decision_function(self, X):
             values closer to -1 or 1 mean more like the first or second
             class in ``classes_``, respectively.
         """
-        check_is_fitted(self, "n_classes_")
+        check_is_fitted(self)
         X = self._validate_data(X)
 
         n_classes = self.n_classes_
@@ -717,7 +717,7 @@ def staged_decision_function(self, X):
             values closer to -1 or 1 mean more like the first or second
             class in ``classes_``, respectively.
         """
-        check_is_fitted(self, "n_classes_")
+        check_is_fitted(self)
         X = self._validate_data(X)
 
         n_classes = self.n_classes_
@@ -786,7 +786,7 @@ def predict_proba(self, X):
             The class probabilities of the input samples. The order of
             outputs is the same of that of the `classes_` attribute.
         """
-        check_is_fitted(self, "n_classes_")
+        check_is_fitted(self)
         X = self._validate_data(X)
 
         n_classes = self.n_classes_
@@ -1109,7 +1109,7 @@ def predict(self, X):
         y : array of shape = [n_samples]
             The predicted regression values.
         """
-        check_is_fitted(self, "estimator_weights_")
+        check_is_fitted(self)
         X = self._validate_data(X)
 
         return self._get_median_predict(X, len(self.estimators_))
@@ -1134,7 +1134,7 @@ def staged_predict(self, X):
         y : generator of array, shape = [n_samples]
             The predicted regression values.
         """
-        check_is_fitted(self, "estimator_weights_")
+        check_is_fitted(self)
         X = self._validate_data(X)
 
         for i, _ in enumerate(self.estimators_, 1):
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index ed4d41cc464f8..01a7b70587f3d 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -452,7 +452,7 @@ def _validate_vocabulary(self):
     def _check_vocabulary(self):
         """Check if vocabulary is empty or missing (not fit-ed)"""
         msg = "%(name)s - Vocabulary wasn't fitted."
-        check_is_fitted(self, 'vocabulary_', msg=msg),
+        check_is_fitted(self, msg=msg),
 
         if len(self.vocabulary_) == 0:
             raise ValueError("Vocabulary is empty")
@@ -1380,7 +1380,7 @@ def transform(self, X, copy=True):
             X.data += 1
 
         if self.use_idf:
-            check_is_fitted(self, '_idf_diag', 'idf vector is not fitted')
+            check_is_fitted(self, 'idf vector is not fitted')
 
             expected_n_features = self._idf_diag.shape[0]
             if n_features != expected_n_features:
@@ -1749,7 +1749,7 @@ def transform(self, raw_documents, copy=True):
         X : sparse matrix, [n_samples, n_features]
             Tf-idf-weighted document-term matrix.
         """
-        check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted')
+        check_is_fitted(self, 'The tfidf vector is not fitted')
 
         X = super().transform(raw_documents)
         return self._tfidf.transform(X, copy=False)
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index ce4eb5ed8bd45..4e957e8463a7c 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -242,7 +242,7 @@ def predict(self, X):
         y : array of shape [n_samples]
             The predicted target values.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
         return self.estimator_.predict(self.transform(X))
 
     @if_delegate_has_method(delegate='estimator')
@@ -258,11 +258,11 @@ def score(self, X, y):
         y : array of shape [n_samples]
             The target values.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
         return self.estimator_.score(self.transform(X), y)
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'support_')
+        check_is_fitted(self)
         return self.support_
 
     @if_delegate_has_method(delegate='estimator')
@@ -284,7 +284,7 @@ def decision_function(self, X):
             Regression and binary classification produce an array of shape
             [n_samples].
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
         return self.estimator_.decision_function(self.transform(X))
 
     @if_delegate_has_method(delegate='estimator')
@@ -304,7 +304,7 @@ def predict_proba(self, X):
             The class probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
         return self.estimator_.predict_proba(self.transform(X))
 
     @if_delegate_has_method(delegate='estimator')
@@ -322,7 +322,7 @@ def predict_log_proba(self, X):
             The class log-probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
         return self.estimator_.predict_log_proba(self.transform(X))
 
     def _more_tags(self):
diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py
index 554cb3d392b29..5b1cae1823e9c 100644
--- a/sklearn/feature_selection/univariate_selection.py
+++ b/sklearn/feature_selection/univariate_selection.py
@@ -429,7 +429,7 @@ def _check_params(self, X, y):
                              % self.percentile)
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         # Cater for NaNs
         if self.percentile == 100:
@@ -514,7 +514,7 @@ def _check_params(self, X, y):
                              % (X.shape[1], self.k))
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         if self.k == 'all':
             return np.ones(self.scores_.shape, dtype=bool)
@@ -587,7 +587,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2):
         self.alpha = alpha
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         return self.pvalues_ < self.alpha
 
@@ -653,7 +653,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2):
         self.alpha = alpha
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         n_features = len(self.pvalues_)
         sv = np.sort(self.pvalues_)
@@ -716,7 +716,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2):
         self.alpha = alpha
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         return (self.pvalues_ < self.alpha / len(self.pvalues_))
 
@@ -811,7 +811,7 @@ def _check_params(self, X, y):
         self._make_selector()._check_params(X, y)
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'scores_')
+        check_is_fitted(self)
 
         selector = self._make_selector()
         selector.pvalues_ = self.pvalues_
diff --git a/sklearn/feature_selection/variance_threshold.py b/sklearn/feature_selection/variance_threshold.py
index 7d98de82c9711..c9eb973dc86c3 100644
--- a/sklearn/feature_selection/variance_threshold.py
+++ b/sklearn/feature_selection/variance_threshold.py
@@ -87,6 +87,6 @@ def fit(self, X, y=None):
         return self
 
     def _get_support_mask(self):
-        check_is_fitted(self, 'variances_')
+        check_is_fitted(self)
 
         return self.variances_ > self.threshold
diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py
index 6270409f129b1..5421f7e408472 100644
--- a/sklearn/gaussian_process/gpc.py
+++ b/sklearn/gaussian_process/gpc.py
@@ -255,7 +255,7 @@ def predict(self, X):
         C : array, shape = (n_samples,)
             Predicted target values for X, values are from ``classes_``
         """
-        check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"])
+        check_is_fitted(self)
 
         # As discussed on Section 3.4.2 of GPML, for making hard binary
         # decisions, it is enough to compute the MAP of the posterior and
@@ -279,7 +279,7 @@ def predict_proba(self, X):
             the model. The columns correspond to the classes in sorted
             order, as they appear in the attribute ``classes_``.
         """
-        check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"])
+        check_is_fitted(self)
 
         # Based on Algorithm 3.2 of GPML
         K_star = self.kernel_(self.X_train_, X)  # K_star =k(x_star)
@@ -663,7 +663,7 @@ def predict(self, X):
         C : array, shape = (n_samples,)
             Predicted target values for X, values are from ``classes_``
         """
-        check_is_fitted(self, ["classes_", "n_classes_"])
+        check_is_fitted(self)
         X = check_array(X)
         return self.base_estimator_.predict(X)
 
@@ -681,7 +681,7 @@ def predict_proba(self, X):
             the model. The columns correspond to the classes in sorted
             order, as they appear in the attribute `classes_`.
         """
-        check_is_fitted(self, ["classes_", "n_classes_"])
+        check_is_fitted(self)
         if self.n_classes_ > 2 and self.multi_class == "one_vs_one":
             raise ValueError("one_vs_one multi-class mode does not support "
                              "predicting probability estimates. Use "
@@ -735,7 +735,7 @@ def log_marginal_likelihood(self, theta=None, eval_gradient=False,
             hyperparameters at position theta.
             Only returned when eval_gradient is True.
         """
-        check_is_fitted(self, ["classes_", "n_classes_"])
+        check_is_fitted(self)
 
         if theta is None:
             if eval_gradient:
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index 88516f70f2e66..e56802bc74326 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -373,7 +373,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             The input data to complete.
         """
-        check_is_fitted(self, 'statistics_')
+        check_is_fitted(self)
 
         X = self._validate_input(X)
 
@@ -653,7 +653,7 @@ def transform(self, X):
             will be boolean.
 
         """
-        check_is_fitted(self, "features_")
+        check_is_fitted(self)
         X = self._validate_input(X)
 
         if X.shape[1] != self._n_features:
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index ecf94d5ccfb57..05e2f1484fccf 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -627,7 +627,7 @@ def transform(self, X):
         Xt : array-like, shape (n_samples, n_features)
              The imputed input data.
         """
-        check_is_fitted(self, 'initial_imputer_')
+        check_is_fitted(self)
 
         if self.add_indicator:
             X_trans_indicator = self.indicator_.transform(X)
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 1ef79f48a0459..9d257427944dc 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -115,7 +115,7 @@ def transform(self, X):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        check_is_fitted(self, 'random_weights_')
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
         projection = safe_sparse_dot(X, self.random_weights_)
@@ -222,7 +222,7 @@ def transform(self, X):
         -------
         X_new : array-like, shape (n_samples, n_components)
         """
-        check_is_fitted(self, 'random_weights_')
+        check_is_fitted(self)
 
         X = as_float_array(X, copy=True)
         X = check_array(X, copy=False)
@@ -580,7 +580,7 @@ def transform(self, X):
         X_transformed : array, shape=(n_samples, n_components)
             Transformed data.
         """
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse='csr')
 
         kernel_params = self._get_kernel_params()
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
index 392ae265a8f20..3d69066e342d6 100644
--- a/sklearn/kernel_ridge.py
+++ b/sklearn/kernel_ridge.py
@@ -188,6 +188,6 @@ def predict(self, X):
         C : array, shape = [n_samples] or [n_samples, n_targets]
             Returns predicted values.
         """
-        check_is_fitted(self, ["X_fit_", "dual_coef_"])
+        check_is_fitted(self)
         K = self._get_kernel(X, self.X_fit_)
         return np.dot(K, self.dual_coef_)
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index 51ff3a2d1588a..b408c8569529d 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -199,7 +199,7 @@ def fit(self, X, y):
         """Fit model."""
 
     def _decision_function(self, X):
-        check_is_fitted(self, "coef_")
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
         return safe_sparse_dot(X, self.coef_.T,
@@ -258,7 +258,7 @@ def decision_function(self, X):
             case, confidence score for self.classes_[1] where >0 means this
             class would be predicted.
         """
-        check_is_fitted(self, 'coef_')
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
 
diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
index 646839a0a3ae6..1aebbfa5ba54e 100644
--- a/sklearn/linear_model/coordinate_descent.py
+++ b/sklearn/linear_model/coordinate_descent.py
@@ -785,7 +785,7 @@ def _decision_function(self, X):
         T : array, shape (n_samples,)
             The predicted decision function
         """
-        check_is_fitted(self, 'n_iter_')
+        check_is_fitted(self)
         if sparse.isspmatrix(X):
             return safe_sparse_dot(X, self.coef_.T,
                                    dense_output=True) + self.intercept_
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 1ad01e5ddc656..432a5a7db3c0d 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1628,7 +1628,7 @@ def predict_proba(self, X):
             Returns the probability of the sample for each class in the model,
             where classes are ordered as they are in ``self.classes_``.
         """
-        check_is_fitted(self, 'coef_')
+        check_is_fitted(self)
 
         ovr = (self.multi_class in ["ovr", "warn"] or
                (self.multi_class == 'auto' and (self.classes_.size <= 2 or
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
index b901e848f49bf..e868a31d17c8d 100644
--- a/sklearn/linear_model/ransac.py
+++ b/sklearn/linear_model/ransac.py
@@ -466,7 +466,7 @@ def predict(self, X):
         y : array, shape = [n_samples] or [n_samples, n_targets]
             Returns predicted values.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
 
         return self.estimator_.predict(X)
 
@@ -488,6 +488,6 @@ def score(self, X, y):
         z : float
             Score of the prediction.
         """
-        check_is_fitted(self, 'estimator_')
+        check_is_fitted(self)
 
         return self.estimator_.score(X, y)
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index afad2e94ed8c1..e80a6a7ec3ce4 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -982,7 +982,7 @@ def predict_proba(self):
         return self._predict_proba
 
     def _predict_proba(self, X):
-        check_is_fitted(self, "t_")
+        check_is_fitted(self)
 
         if self.loss == "log":
             return self._predict_proba_lr(X)
@@ -1216,7 +1216,7 @@ def _decision_function(self, X):
         array, shape (n_samples,)
            Predicted target values per element in X.
         """
-        check_is_fitted(self, ["t_", "coef_", "intercept_"], all_or_any=all)
+        check_is_fitted(self, all_or_any=all)
 
         X = check_array(X, accept_sparse='csr')
 
diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py
index cf3c58486c27a..4e90d4876f4df 100644
--- a/sklearn/manifold/locally_linear.py
+++ b/sklearn/manifold/locally_linear.py
@@ -717,7 +717,7 @@ def transform(self, X):
         Because of scaling performed by this method, it is discouraged to use
         it together with methods that are not scale-invariant (like SVMs)
         """
-        check_is_fitted(self, "nbrs_")
+        check_is_fitted(self)
 
         X = check_array(X)
         ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,
diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py
index 120b72f06cd01..610af54cc343a 100644
--- a/sklearn/mixture/gaussian_mixture.py
+++ b/sklearn/mixture/gaussian_mixture.py
@@ -687,7 +687,7 @@ def _compute_lower_bound(self, _, log_prob_norm):
         return log_prob_norm
 
     def _check_is_fitted(self):
-        check_is_fitted(self, ['weights_', 'means_', 'precisions_cholesky_'])
+        check_is_fitted(self)
 
     def _get_parameters(self):
         return (self.weights_, self.means_, self.covariances_,
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 208ab536c8181..74284f3bdb2fd 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -437,7 +437,7 @@ def _check_is_fitted(self, method_name):
                                  'attribute'
                                  % (type(self).__name__, method_name))
         else:
-            check_is_fitted(self, 'best_estimator_')
+            check_is_fitted(self)
 
     @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))
     def predict(self, X):
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index 6315197ad7856..5a8dcebd4170b 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -113,17 +113,17 @@ def fit(self, X, y):
         return self
 
     def predict(self, X):
-        check_is_fitted(self, 'y_')
+        check_is_fitted(self)
 
         return np.repeat(self.y_, X.shape[0])
 
     def decision_function(self, X):
-        check_is_fitted(self, 'y_')
+        check_is_fitted(self)
 
         return np.repeat(self.y_, X.shape[0])
 
     def predict_proba(self, X):
-        check_is_fitted(self, 'y_')
+        check_is_fitted(self)
 
         return np.repeat([np.hstack([1 - self.y_, self.y_])],
                          X.shape[0], axis=0)
@@ -285,7 +285,7 @@ def predict(self, X):
         y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes].
             Predicted multi-class targets.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
         n_samples = _num_samples(X)
         if self.label_binarizer_.y_type_ == "multiclass":
@@ -337,7 +337,7 @@ def predict_proba(self, X):
             Returns the probability of the sample for each class in the model,
             where classes are ordered as they are in `self.classes_`.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         # Y[i, j] gives the probability that sample i has the label j.
         # In the multi-label case, these are not disjoint.
         Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T
@@ -366,7 +366,7 @@ def decision_function(self, X):
         -------
         T : array-like, shape = [n_samples, n_classes]
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if len(self.estimators_) == 1:
             return self.estimators_[0].decision_function(X)
         return np.array([est.decision_function(X).ravel()
@@ -383,7 +383,7 @@ def n_classes_(self):
 
     @property
     def coef_(self):
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if not hasattr(self.estimators_[0], "coef_"):
             raise AttributeError(
                 "Base estimator doesn't have a coef_ attribute.")
@@ -394,7 +394,7 @@ def coef_(self):
 
     @property
     def intercept_(self):
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if not hasattr(self.estimators_[0], "intercept_"):
             raise AttributeError(
                 "Base estimator doesn't have an intercept_ attribute.")
@@ -603,7 +603,7 @@ def decision_function(self, X):
         -------
         Y : array-like, shape = [n_samples, n_classes]
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
 
         indices = self.pairwise_indices_
         if indices is None:
@@ -768,7 +768,7 @@ def predict(self, X):
         y : numpy array of shape [n_samples]
             Predicted multi-class targets.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         X = check_array(X)
         Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T
         pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 4411919c1821f..463b72d40f47a 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -185,7 +185,7 @@ def predict(self, X):
             Multi-output targets predicted across multiple predictors.
             Note: Separate models are generated for each predictor.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if not hasattr(self.estimator, "predict"):
             raise ValueError("The base estimator should implement"
                              " a predict method")
@@ -344,7 +344,7 @@ def predict_proba(self, X):
             The class probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         if not all([hasattr(estimator, "predict_proba")
                     for estimator in self.estimators_]):
             raise ValueError("The base estimator should implement "
@@ -370,7 +370,7 @@ def score(self, X, y):
         scores : float
             accuracy_score of self.predict(X) versus y
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         n_outputs_ = len(self.estimators_)
         if y.ndim == 1:
             raise ValueError("y must have at least two dimensions for "
@@ -472,7 +472,7 @@ def predict(self, X):
             The predicted values.
 
         """
-        check_is_fitted(self, 'estimators_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=True)
         Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))
         for chain_idx, estimator in enumerate(self.estimators_):
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index b3518c8f22e0c..aa14a7f085828 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -431,7 +431,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False,
         return self
 
     def _joint_log_likelihood(self, X):
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
 
         X = check_array(X)
         joint_log_likelihood = []
@@ -743,7 +743,7 @@ def _update_feature_log_prob(self, alpha):
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
         return (safe_sparse_dot(X, self.feature_log_prob_.T) +
@@ -852,7 +852,7 @@ def _update_feature_log_prob(self, alpha):
 
     def _joint_log_likelihood(self, X):
         """Calculate the class scores for the samples in X."""
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse="csr")
         jll = safe_sparse_dot(X, self.feature_log_prob_.T)
@@ -963,7 +963,7 @@ def _update_feature_log_prob(self, alpha):
 
     def _joint_log_likelihood(self, X):
         """Calculate the posterior log probability of the samples X"""
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
 
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 9a05eb62c2afc..041c13aae5417 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -388,7 +388,7 @@ class from an array representing our data set and ask who's
                [2]]...)
 
         """
-        check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any)
+        check_is_fitted(self, all_or_any=any)
 
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
@@ -543,7 +543,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None,
         --------
         NearestNeighbors.radius_neighbors_graph
         """
-        check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any)
+        check_is_fitted(self, all_or_any=any)
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
 
@@ -691,7 +691,7 @@ class from an array representing our data set and ask who's
         For efficiency, `radius_neighbors` returns arrays of objects, where
         each object is a 1D array of indices or distances.
         """
-        check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any)
+        check_is_fitted(self, all_or_any=any)
 
         if X is not None:
             query_is_train = False
@@ -828,7 +828,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'):
         --------
         kneighbors_graph
         """
-        check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any)
+        check_is_fitted(self, all_or_any=any)
         if X is not None:
             X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
 
diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index 5060270ce1e61..68a72c92da865 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -258,7 +258,7 @@ def transform(self, X):
             If :meth:`fit` has not been called before.
         """
 
-        check_is_fitted(self, ['components_'])
+        check_is_fitted(self)
         X = check_array(X)
 
         return np.dot(X, self.components_.T)
diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py
index 5626189222e5a..3e1577469c920 100644
--- a/sklearn/neighbors/nearest_centroid.py
+++ b/sklearn/neighbors/nearest_centroid.py
@@ -191,7 +191,7 @@ def predict(self, X):
         be the distance matrix between the data to be predicted and
         ``self.centroids_``.
         """
-        check_is_fitted(self, 'centroids_')
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
         return self.classes_[pairwise_distances(
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index ebe5f03801ed5..11e1c4a3ab793 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -970,7 +970,7 @@ def predict(self, X):
         y : array-like, shape (n_samples,) or (n_samples, n_classes)
             The predicted classes.
         """
-        check_is_fitted(self, "coefs_")
+        check_is_fitted(self)
         y_pred = self._predict(X)
 
         if self.n_outputs_ == 1:
@@ -1071,7 +1071,7 @@ def predict_proba(self, X):
             The predicted probability of the sample for each class in the
             model, where classes are ordered as they are in `self.classes_`.
         """
-        check_is_fitted(self, "coefs_")
+        check_is_fitted(self)
         y_pred = self._predict(X)
 
         if self.n_outputs_ == 1:
@@ -1332,7 +1332,7 @@ def predict(self, X):
         y : array-like, shape (n_samples, n_outputs)
             The predicted values.
         """
-        check_is_fitted(self, "coefs_")
+        check_is_fitted(self)
         y_pred = self._predict(X)
         if y_pred.shape[1] == 1:
             return y_pred.ravel()
diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py
index b2b6166d4d253..fa39f5f23d80c 100644
--- a/sklearn/neural_network/rbm.py
+++ b/sklearn/neural_network/rbm.py
@@ -116,7 +116,7 @@ def transform(self, X):
         h : array, shape (n_samples, n_components)
             Latent representations of the data.
         """
-        check_is_fitted(self, "components_")
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr', dtype=np.float64)
         return self._mean_hiddens(X)
@@ -208,7 +208,7 @@ def gibbs(self, v):
         v_new : array-like, shape (n_samples, n_features)
             Values of the visible layer after one Gibbs step.
         """
-        check_is_fitted(self, "components_")
+        check_is_fitted(self)
         if not hasattr(self, "random_state_"):
             self.random_state_ = check_random_state(self.random_state)
         h_ = self._sample_hiddens(v, self.random_state_)
@@ -299,7 +299,7 @@ def score_samples(self, X):
         free energy on X, then on a randomly corrupted version of X, and
         returns the log of the logistic function of the difference.
         """
-        check_is_fitted(self, "components_")
+        check_is_fitted(self)
 
         v = check_array(X, accept_sparse='csr')
         rng = check_random_state(self.random_state)
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index b7ffd96032d2a..1be7499f783ec 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -253,7 +253,7 @@ def transform(self, X):
         Xt : numeric array-like or sparse matrix
             Data in the binned space.
         """
-        check_is_fitted(self, ["bin_edges_"])
+        check_is_fitted(self)
 
         Xt = check_array(X, copy=True, dtype=FLOAT_DTYPES)
         n_features = self.n_bins_.shape[0]
@@ -294,7 +294,7 @@ def inverse_transform(self, Xt):
         Xinv : numeric array-like
             Data in the original feature space.
         """
-        check_is_fitted(self, ["bin_edges_"])
+        check_is_fitted(self)
 
         if 'onehot' in self.encode:
             Xt = self._encoder.inverse_transform(Xt)
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index c1d3b1e80c352..6a16b484ad563 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -371,7 +371,7 @@ def transform(self, X):
         X_out : sparse matrix if sparse=True else a 2-d array
             Transformed input.
         """
-        check_is_fitted(self, 'categories_')
+        check_is_fitted(self)
         # validation of X happens in _check_X called by _transform
         X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown)
 
@@ -423,7 +423,7 @@ def inverse_transform(self, X):
             Inverse transformed array.
 
         """
-        check_is_fitted(self, 'categories_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse='csr')
 
         n_samples, _ = X.shape
@@ -506,7 +506,7 @@ def get_feature_names(self, input_features=None):
         output_feature_names : array of string, length n_output_features
 
         """
-        check_is_fitted(self, 'categories_')
+        check_is_fitted(self)
         cats = self.categories_
         if input_features is None:
             input_features = ['x%d' % i for i in range(len(cats))]
@@ -639,7 +639,7 @@ def inverse_transform(self, X):
             Inverse transformed array.
 
         """
-        check_is_fitted(self, 'categories_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse='csr')
 
         n_samples, _ = X.shape
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 46530de8e6ad9..e70c98e48e898 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -387,7 +387,7 @@ def transform(self, X):
         X : array-like, shape [n_samples, n_features]
             Input data that will be transformed.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
 
         X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
                         force_all_finite="allow-nan")
@@ -404,7 +404,7 @@ def inverse_transform(self, X):
         X : array-like, shape [n_samples, n_features]
             Input data that will be transformed. It cannot be sparse.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
 
         X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,
                         force_all_finite="allow-nan")
@@ -756,7 +756,7 @@ def transform(self, X, copy=None):
         copy : bool, optional (default: None)
             Copy the input X or not.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
 
         copy = copy if copy is not None else self.copy
         X = check_array(X, accept_sparse='csr', copy=copy,
@@ -792,7 +792,7 @@ def inverse_transform(self, X, copy=None):
         X_tr : array-like, shape [n_samples, n_features]
             Transformed array.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
 
         copy = copy if copy is not None else self.copy
         if sparse.issparse(X):
@@ -957,7 +957,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}
             The data that should be scaled.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
@@ -976,7 +976,7 @@ def inverse_transform(self, X):
         X : {array-like, sparse matrix}
             The data that should be transformed back.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
@@ -1206,7 +1206,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}
             The data used to scale along the specified axis.
         """
-        check_is_fitted(self, 'center_', 'scale_')
+        check_is_fitted(self, 'scale_')
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
@@ -1229,7 +1229,7 @@ def inverse_transform(self, X):
         X : array-like
             The data used to scale along the specified axis.
         """
-        check_is_fitted(self, 'center_', 'scale_')
+        check_is_fitted(self, 'scale_')
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
@@ -1415,7 +1415,7 @@ def _combinations(n_features, degree, interaction_only, include_bias):
 
     @property
     def powers_(self):
-        check_is_fitted(self, 'n_input_features_')
+        check_is_fitted(self)
 
         combinations = self._combinations(self.n_input_features_, self.degree,
                                           self.interaction_only,
@@ -1502,7 +1502,7 @@ def transform(self, X):
             The matrix of features, where NP is the number of polynomial
             features generated from the combination of inputs.
         """
-        check_is_fitted(self, ['n_input_features_', 'n_output_features_'])
+        check_is_fitted(self)
 
         X = check_array(X, order='F', dtype=FLOAT_DTYPES,
                         accept_sparse=('csr', 'csc'))
@@ -2014,7 +2014,7 @@ def transform(self, K, copy=True):
         -------
         K_new : numpy array of shape [n_samples1, n_samples2]
         """
-        check_is_fitted(self, 'K_fit_all_')
+        check_is_fitted(self)
 
         K = check_array(K, copy=copy, dtype=FLOAT_DTYPES)
 
@@ -2411,7 +2411,7 @@ def _check_inputs(self, X, accept_sparse_negative=False, copy=False):
 
     def _check_is_fitted(self, X):
         """Check the inputs before transforming"""
-        check_is_fitted(self, 'quantiles_')
+        check_is_fitted(self)
         # check that the dimension of X are adequate with the fitted data
         if X.shape[1] != self.quantiles_.shape[1]:
             raise ValueError('X does not have the same number of features as'
@@ -2786,7 +2786,7 @@ def transform(self, X):
         X_trans : array-like, shape (n_samples, n_features)
             The transformed data.
         """
-        check_is_fitted(self, 'lambdas_')
+        check_is_fitted(self)
         X = self._check_input(X, check_positive=True, check_shape=True)
 
         transform_function = {'box-cox': boxcox,
@@ -2832,7 +2832,7 @@ def inverse_transform(self, X):
         X : array-like, shape (n_samples, n_features)
             The original data
         """
-        check_is_fitted(self, 'lambdas_')
+        check_is_fitted(self)
         X = self._check_input(X, check_shape=True)
 
         if self.standardize:
diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py
index a236589d1698d..118fc22fa7f11 100644
--- a/sklearn/preprocessing/label.py
+++ b/sklearn/preprocessing/label.py
@@ -256,7 +256,7 @@ def transform(self, y):
         -------
         y : array-like of shape [n_samples]
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
         y = column_or_1d(y, warn=True)
         # transform of empty array is empty array
         if _num_samples(y) == 0:
@@ -277,7 +277,7 @@ def inverse_transform(self, y):
         -------
         y : numpy array of shape [n_samples]
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
         y = column_or_1d(y, warn=True)
         # inverse transform of empty array is empty array
         if _num_samples(y) == 0:
@@ -465,7 +465,7 @@ def transform(self, y):
         Y : numpy array or CSR matrix of shape [n_samples, n_classes]
             Shape will be [n_samples, 1] for binary problems.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         y_is_multilabel = type_of_target(y).startswith('multilabel')
         if y_is_multilabel and not self.y_type_.startswith('multilabel'):
@@ -508,7 +508,7 @@ def inverse_transform(self, Y, threshold=None):
         linear model's decision_function method directly as the input
         of inverse_transform.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         if threshold is None:
             threshold = (self.pos_label + self.neg_label) / 2.
@@ -911,7 +911,7 @@ def transform(self, y):
             A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in
             `y[i]`, and 0 otherwise.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         class_to_index = self._build_cache()
         yt = self._transform(y, class_to_index)
@@ -976,7 +976,7 @@ def inverse_transform(self, yt):
             The set of labels for each sample such that `y[i]` consists of
             `classes_[j]` for each `yt[i, j] == 1`.
         """
-        check_is_fitted(self, 'classes_')
+        check_is_fitted(self)
 
         if yt.shape[1] != len(self.classes_):
             raise ValueError('Expected indicator for {0} classes, but got {1}'
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 8297a42ab17f8..4f8c8af1283b2 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -402,7 +402,7 @@ def transform(self, X):
         """
         X = check_array(X, accept_sparse=['csr', 'csc'])
 
-        check_is_fitted(self, 'components_')
+        check_is_fitted(self)
 
         if X.shape[1] != self.components_.shape[1]:
             raise ValueError(
diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py
index 4820af8cb2b69..704a075d95932 100644
--- a/sklearn/semi_supervised/label_propagation.py
+++ b/sklearn/semi_supervised/label_propagation.py
@@ -184,7 +184,7 @@ def predict_proba(self, X):
             Normalized probability distributions across
             class labels
         """
-        check_is_fitted(self, 'X_')
+        check_is_fitted(self)
 
         X_2d = check_array(X, accept_sparse=['csc', 'csr', 'coo', 'dok',
                                              'bsr', 'lil', 'dia'])
diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py
index 40f87baec06be..b2723cc7e0c2b 100644
--- a/sklearn/svm/base.py
+++ b/sklearn/svm/base.py
@@ -437,7 +437,7 @@ def _sparse_decision_function(self, X):
             self.probA_, self.probB_)
 
     def _validate_for_predict(self, X):
-        check_is_fitted(self, 'support_')
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C",
                         accept_large_sparse=False)
@@ -562,7 +562,7 @@ def predict(self, X):
         y_pred : array, shape (n_samples,)
             Class labels for samples in X.
         """
-        check_is_fitted(self, "classes_")
+        check_is_fitted(self)
         if self.break_ties and self.decision_function_shape == 'ovo':
             raise ValueError("break_ties must be False when "
                              "decision_function_shape is 'ovo'")
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 822dd0edb5501..6eb9c61ec2b2d 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -65,7 +65,7 @@ def fit(self, X, y=None, *args, **kwargs):
             return True
 
         def _check_fit(self):
-            check_is_fitted(self, 'coef_')
+            check_is_fitted(self)
 
         @hides
         def inverse_transform(self, X, *args, **kwargs):
diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py
index 69c3b1a8270b6..9f6bf979717cf 100644
--- a/sklearn/tree/tree.py
+++ b/sklearn/tree/tree.py
@@ -111,13 +111,13 @@ def get_depth(self):
         The depth of a tree is the maximum distance between the root
         and any leaf.
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
         return self.tree_.max_depth
 
     def get_n_leaves(self):
         """Returns the number of leaves of the decision tree.
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
         return self.tree_.n_leaves
 
     def fit(self, X, y, sample_weight=None, check_input=True,
@@ -424,7 +424,7 @@ def predict(self, X, check_input=True):
         y : array of shape = [n_samples] or [n_samples, n_outputs]
             The predicted classes, or the predict values.
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
         X = self._validate_X_predict(X, check_input)
         proba = self.tree_.predict(X)
         n_samples = X.shape[0]
@@ -478,7 +478,7 @@ def apply(self, X, check_input=True):
             ``[0; self.tree_.node_count)``, possibly with gaps in the
             numbering.
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
         X = self._validate_X_predict(X, check_input)
         return self.tree_.apply(X)
 
@@ -520,7 +520,7 @@ def feature_importances_(self):
         -------
         feature_importances_ : array, shape = [n_features]
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
 
         return self.tree_.compute_feature_importances()
 
@@ -841,7 +841,7 @@ class in a leaf.
             The class probabilities of the input samples. The order of the
             classes corresponds to that in the attribute `classes_`.
         """
-        check_is_fitted(self, 'tree_')
+        check_is_fitted(self)
         X = self._validate_X_predict(X, check_input)
         proba = self.tree_.predict(X)
 
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index b4bd2daac00d7..b93c66f7cfbb6 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -169,7 +169,7 @@ def fit(self, X, y):
         return self
 
     def predict(self, X):
-        check_is_fitted(self, 'coef_')
+        check_is_fitted(self)
         X = check_array(X)
         return np.ones(X.shape[0])
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index e1a1270f21e63..14b13d94ca5d1 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -632,34 +632,34 @@ def test_check_symmetric():
 
 def test_check_is_fitted():
     # Check is ValueError raised when non estimator instance passed
-    assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
-    assert_raises(TypeError, check_is_fitted, "SVR", "support_")
+    assert_raises(ValueError, check_is_fitted, ARDRegression)
+    assert_raises(TypeError, check_is_fitted, "SVR")
 
     ard = ARDRegression()
     svr = SVR()
 
     try:
-        assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
-        assert_raises(NotFittedError, check_is_fitted, svr, "support_")
+        assert_raises(NotFittedError, check_is_fitted, ard)
+        assert_raises(NotFittedError, check_is_fitted, svr)
     except ValueError:
         assert False, "check_is_fitted failed with ValueError"
 
     # NotFittedError is a subclass of both ValueError and AttributeError
     try:
-        check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
+        check_is_fitted(ard, msg="Random message %(name)s, %(name)s")
     except ValueError as e:
         assert str(e) == "Random message ARDRegression, ARDRegression"
 
     try:
-        check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
+        check_is_fitted(svr, msg="Another message %(name)s, %(name)s")
     except AttributeError as e:
         assert str(e) == "Another message SVR, SVR"
 
     ard.fit(*make_blobs())
     svr.fit(*make_blobs())
 
-    assert check_is_fitted(ard, "coef_") is None
-    assert check_is_fitted(svr, "support_") is None
+    assert check_is_fitted(ard) is None
+    assert check_is_fitted(svr) is None
 
 
 def test_check_consistent_length():
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 2150a54ad75d9..b4e5bf8154e25 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -866,7 +866,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True,
     return array
 
 
-def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
+def check_is_fitted(estimator, *, msg=None, all_or_any=all):
     """Perform is_fitted validation for estimator.
 
     Checks if the estimator is fitted by verifying the presence of
@@ -910,10 +910,10 @@ def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
     if not hasattr(estimator, 'fit'):
         raise TypeError("%s is not an estimator instance." % (estimator))
 
-    if not isinstance(attributes, (list, tuple)):
-        attributes = [attributes]
+    attrs = [v for v in vars(estimator) if v.endswith("_")
+             and not v.startswith("__")]
 
-    if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
+    if not len(attrs):
         raise NotFittedError(msg % {'name': type(estimator).__name__})
 
 

From e034ed80e20536e38b913e851eeed1427f7ebcc0 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 1 Aug 2019 13:07:18 -0400
Subject: [PATCH 34/86] cleanup, remove any_or_all

---
 sklearn/decomposition/base.py                        |  2 +-
 sklearn/discriminant_analysis.py                     |  2 +-
 .../_hist_gradient_boosting/gradient_boosting.py     |  4 ++--
 sklearn/ensemble/gradient_boosting.py                | 10 +++++-----
 sklearn/kernel_approximation.py                      |  2 +-
 sklearn/linear_model/base.py                         |  4 ++--
 sklearn/linear_model/stochastic_gradient.py          |  2 +-
 sklearn/neighbors/base.py                            |  8 ++++----
 sklearn/preprocessing/data.py                        |  4 ++--
 sklearn/utils/validation.py                          | 12 +++---------
 10 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py
index 0dad8c6130d68..2f11d8bd847b8 100644
--- a/sklearn/decomposition/base.py
+++ b/sklearn/decomposition/base.py
@@ -122,7 +122,7 @@ def transform(self, X):
         IncrementalPCA(batch_size=3, n_components=2)
         >>> ipca.transform(X) # doctest: +SKIP
         """
-        check_is_fitted(self, all_or_any=all)
+        check_is_fitted(self)
 
         X = check_array(X)
         if self.mean_ is not None:
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 4a3542e204288..efe39b8c3fb9a 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -505,7 +505,7 @@ def transform(self, X):
         if self.solver == 'lsqr':
             raise NotImplementedError("transform not implemented for 'lsqr' "
                                       "solver (use 'svd' or 'eigen').")
-        check_is_fitted(self, all_or_any=any)
+        check_is_fitted(self)
 
         X = check_array(X)
         if self.solver == 'svd':
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index dc040ed1fa409..e66b755964058 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -541,7 +541,7 @@ def _raw_predict(self, X):
         """
         X = check_array(X, dtype=[X_DTYPE, X_BINNED_DTYPE],
                         force_all_finite=False)
-        check_is_fitted(self, '_predictors')
+        check_is_fitted(self)
         if X.shape[1] != self.n_features_:
             raise ValueError(
                 'X has {} features but this estimator was trained with '
@@ -603,7 +603,7 @@ def _encode_y(self, y=None):
 
     @property
     def n_iter_(self):
-        check_is_fitted(self, '_predictors')
+        check_is_fitted(self)
         return len(self._predictors)
 
     def _more_tags(self):
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 11813855d01d8..43c4dae31f66e 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -115,7 +115,7 @@ def predict(self, X):
         y : array, shape (n_samples,)
             Returns predicted values.
         """
-        check_is_fitted(self, 'quantile')
+        check_is_fitted(self)
 
         y = np.empty((X.shape[0], 1), dtype=np.float64)
         y.fill(self.quantile)
@@ -158,7 +158,7 @@ def predict(self, X):
         y : array, shape (n_samples,)
             Returns predicted values.
         """
-        check_is_fitted(self, 'mean')
+        check_is_fitted(self)
 
         y = np.empty((X.shape[0], 1), dtype=np.float64)
         y.fill(self.mean)
@@ -210,7 +210,7 @@ def predict(self, X):
         y : array, shape (n_samples,)
             Returns predicted values.
         """
-        check_is_fitted(self, 'prior')
+        check_is_fitted(self)
 
         y = np.empty((X.shape[0], 1), dtype=np.float64)
         y.fill(self.prior)
@@ -262,7 +262,7 @@ def predict(self, X):
         y : array, shape (n_samples,)
             Returns predicted values.
         """
-        check_is_fitted(self, 'priors')
+        check_is_fitted(self)
 
         y = np.empty((X.shape[0], self.priors.shape[0]), dtype=np.float64)
         y[:] = self.priors
@@ -316,7 +316,7 @@ def predict(self, X):
         y : array, shape (n_samples,)
             Returns predicted values.
         """
-        check_is_fitted(self, 'n_classes')
+        check_is_fitted(self)
 
         y = np.empty((X.shape[0], self.n_classes), dtype=np.float64)
         y.fill(0.0)
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 9d257427944dc..82cb37104cbff 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -350,7 +350,7 @@ def transform(self, X):
         """
         msg = ("%(name)s is not fitted. Call fit to set the parameters before"
                " calling transform")
-        check_is_fitted(self, "sample_interval_", msg=msg)
+        check_is_fitted(self, msg=msg)
 
         X = check_array(X, accept_sparse='csr')
         sparse = sp.issparse(X)
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
index b408c8569529d..b36516e081392 100644
--- a/sklearn/linear_model/base.py
+++ b/sklearn/linear_model/base.py
@@ -327,7 +327,7 @@ def densify(self):
         self : estimator
         """
         msg = "Estimator, %(name)s, must be fitted before densifying."
-        check_is_fitted(self, "coef_", msg=msg)
+        check_is_fitted(self, msg=msg)
         if sp.issparse(self.coef_):
             self.coef_ = self.coef_.toarray()
         return self
@@ -357,7 +357,7 @@ def sparsify(self):
         self : estimator
         """
         msg = "Estimator, %(name)s, must be fitted before sparsifying."
-        check_is_fitted(self, "coef_", msg=msg)
+        check_is_fitted(self, msg=msg)
         self.coef_ = sp.csr_matrix(self.coef_)
         return self
 
diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
index e80a6a7ec3ce4..50c91513c12db 100644
--- a/sklearn/linear_model/stochastic_gradient.py
+++ b/sklearn/linear_model/stochastic_gradient.py
@@ -1216,7 +1216,7 @@ def _decision_function(self, X):
         array, shape (n_samples,)
            Predicted target values per element in X.
         """
-        check_is_fitted(self, all_or_any=all)
+        check_is_fitted(self)
 
         X = check_array(X, accept_sparse='csr')
 
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 041c13aae5417..4f7ef38a4ae14 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -388,7 +388,7 @@ class from an array representing our data set and ask who's
                [2]]...)
 
         """
-        check_is_fitted(self, all_or_any=any)
+        check_is_fitted(self)
 
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
@@ -543,7 +543,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None,
         --------
         NearestNeighbors.radius_neighbors_graph
         """
-        check_is_fitted(self, all_or_any=any)
+        check_is_fitted(self)
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
 
@@ -691,7 +691,7 @@ class from an array representing our data set and ask who's
         For efficiency, `radius_neighbors` returns arrays of objects, where
         each object is a 1D array of indices or distances.
         """
-        check_is_fitted(self, all_or_any=any)
+        check_is_fitted(self)
 
         if X is not None:
             query_is_train = False
@@ -828,7 +828,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'):
         --------
         kneighbors_graph
         """
-        check_is_fitted(self, all_or_any=any)
+        check_is_fitted(self)
         if X is not None:
             X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
 
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index e70c98e48e898..b3f09664f025d 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -1206,7 +1206,7 @@ def transform(self, X):
         X : {array-like, sparse matrix}
             The data used to scale along the specified axis.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
@@ -1229,7 +1229,7 @@ def inverse_transform(self, X):
         X : array-like
             The data used to scale along the specified axis.
         """
-        check_is_fitted(self, 'scale_')
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
                         estimator=self, dtype=FLOAT_DTYPES,
                         force_all_finite='allow-nan')
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index b4e5bf8154e25..48daa64da77ee 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -866,21 +866,18 @@ def check_symmetric(array, tol=1E-10, raise_warning=True,
     return array
 
 
-def check_is_fitted(estimator, *, msg=None, all_or_any=all):
+def check_is_fitted(estimator, *, msg=None):
     """Perform is_fitted validation for estimator.
 
     Checks if the estimator is fitted by verifying the presence of
-    "all_or_any" of the passed attributes and raises a NotFittedError with the
-    given message.
+    fitted attributes (ending with a trailing underscore) and otherwise
+    raises a NotFittedError with the given message.
 
     Parameters
     ----------
     estimator : estimator instance.
         estimator instance for which the check is performed.
 
-    attributes : attribute name(s) given as string or a list/tuple of strings
-        Eg.:
-            ``["coef_", "estimator_", ...], "coef_"``
 
     msg : string
         The default error message is, "This %(name)s instance is not fitted
@@ -891,9 +888,6 @@ def check_is_fitted(estimator, *, msg=None, all_or_any=all):
 
         Eg. : "Estimator, %(name)s, must be fitted before sparsifying".
 
-    all_or_any : callable, {all, any}, default all
-        Specify whether all or any of the given attributes must exist.
-
     Returns
     -------
     None

From 1dc925854709a6a157d32ba4248edb1e9047b77f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 1 Aug 2019 13:18:24 -0400
Subject: [PATCH 35/86] fix LOF, birch, mixtures

---
 sklearn/cluster/birch.py            |  3 +--
 sklearn/mixture/base.py             | 13 +++++--------
 sklearn/mixture/bayesian_mixture.py |  6 ------
 sklearn/neighbors/lof.py            |  6 ++----
 4 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py
index 941b833e977f7..11bb0f17a1dc6 100644
--- a/sklearn/cluster/birch.py
+++ b/sklearn/cluster/birch.py
@@ -534,8 +534,7 @@ def partial_fit(self, X=None, y=None):
             return self._fit(X)
 
     def _check_fit(self, X):
-        check_is_fitted(self,
-                        all_or_any=any)
+        check_is_fitted(self)
 
         if (hasattr(self, 'subcluster_centers_') and
                 X.shape[1] != self.subcluster_centers_.shape[1]):
diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py
index 8920bef181226..26410fc5256af 100644
--- a/sklearn/mixture/base.py
+++ b/sklearn/mixture/base.py
@@ -15,6 +15,7 @@
 from ..base import DensityMixin
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_random_state
+from ..utils.validation import check_is_fitted
 from ..utils.fixes import logsumexp
 
 
@@ -308,10 +309,6 @@ def _m_step(self, X, log_resp):
         """
         pass
 
-    @abstractmethod
-    def _check_is_fitted(self):
-        pass
-
     @abstractmethod
     def _get_parameters(self):
         pass
@@ -334,7 +331,7 @@ def score_samples(self, X):
         log_prob : array, shape (n_samples,)
             Log probabilities of each data point in X.
         """
-        self._check_is_fitted()
+        check_is_fitted(self)
         X = _check_X(X, None, self.means_.shape[1])
 
         return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
@@ -369,7 +366,7 @@ def predict(self, X):
         labels : array, shape (n_samples,)
             Component labels.
         """
-        self._check_is_fitted()
+        check_is_fitted(self)
         X = _check_X(X, None, self.means_.shape[1])
         return self._estimate_weighted_log_prob(X).argmax(axis=1)
 
@@ -388,7 +385,7 @@ def predict_proba(self, X):
             Returns the probability each Gaussian (state) in
             the model given each sample.
         """
-        self._check_is_fitted()
+        check_is_fitted(self)
         X = _check_X(X, None, self.means_.shape[1])
         _, log_resp = self._estimate_log_prob_resp(X)
         return np.exp(log_resp)
@@ -410,7 +407,7 @@ def sample(self, n_samples=1):
             Component labels
 
         """
-        self._check_is_fitted()
+        check_is_fitted(self)
 
         if n_samples < 1:
             raise ValueError(
diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py
index 88c0ab66ae20a..b0cc600d077da 100644
--- a/sklearn/mixture/bayesian_mixture.py
+++ b/sklearn/mixture/bayesian_mixture.py
@@ -646,12 +646,6 @@ def _estimate_wishart_spherical(self, nk, xk, sk):
         # Contrary to the original bishop book, we normalize the covariances
         self.covariances_ /= self.degrees_of_freedom_
 
-    def _check_is_fitted(self):
-        check_is_fitted(self, ['weight_concentration_', 'mean_precision_',
-                               'means_', 'degrees_of_freedom_',
-                               'covariances_', 'precisions_',
-                               'precisions_cholesky_'])
-
     def _m_step(self, X, log_resp):
         """M step.
 
diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py
index a58997502be91..f4f697565cd3e 100644
--- a/sklearn/neighbors/lof.py
+++ b/sklearn/neighbors/lof.py
@@ -313,8 +313,7 @@ def _predict(self, X=None):
         is_inlier : array, shape (n_samples,)
             Returns -1 for anomalies/outliers and +1 for inliers.
         """
-        check_is_fitted(self, ["offset_", "negative_outlier_factor_",
-                               "n_neighbors_", "_distances_fit_X_"])
+        check_is_fitted(self)
 
         if X is not None:
             X = check_array(X, accept_sparse='csr')
@@ -454,8 +453,7 @@ def _score_samples(self, X):
             The opposite of the Local Outlier Factor of each input samples.
             The lower, the more abnormal.
         """
-        check_is_fitted(self, ["offset_", "negative_outlier_factor_",
-                               "_distances_fit_X_"])
+        check_is_fitted(self)
         X = check_array(X, accept_sparse='csr')
 
         distances_X, neighbors_indices_X = (

From 92d1aaf596745c2abe7a3bce9484d34852505d92 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 19:27:04 +0200
Subject: [PATCH 36/86] iter

---
 sklearn/utils/__init__.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 9e6396e7b5ba1..3ea68ac4ef470 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -194,11 +194,21 @@ def _array_indexing(array, key, axis):
 
 def _pandas_indexing(X, key, axis, by_name):
     """Index a pandas dataframe or a series."""
-    if hasattr(key, 'flags'):
+    if hasattr(key, 'shape'):
         # Work-around for indexing with read-only key in pandas
         key = key if key.flags.writeable else key.copy()
     indexer = 'loc' if by_name else 'iloc'
-    return getattr(X, indexer)[:, key] if axis else getattr(X, indexer)[key]
+    try:
+        return (getattr(X, indexer)[:, key]
+                if axis else getattr(X, indexer)[key])
+    except ValueError:
+        # Cython typed memoryviews internally used in pandas do not support
+        # readonly buffers.
+        warnings.warn(
+            "Copying input dataframe for slicing.", DataConversionWarning
+        )
+        return (getattr(X.copy(), indexer)[:, key]
+                if axis else getattr(X.copy(), indexer)[key])
 
 
 def _list_indexing(X, key):

From d6034ea6eb4d8f07be8e2476e9480f7d4a8ff797 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Thu, 1 Aug 2019 13:29:58 -0400
Subject: [PATCH 37/86] remove unused method

---
 sklearn/mixture/gaussian_mixture.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py
index 610af54cc343a..b7941365b2609 100644
--- a/sklearn/mixture/gaussian_mixture.py
+++ b/sklearn/mixture/gaussian_mixture.py
@@ -686,9 +686,6 @@ def _estimate_log_weights(self):
     def _compute_lower_bound(self, _, log_prob_norm):
         return log_prob_norm
 
-    def _check_is_fitted(self):
-        check_is_fitted(self)
-
     def _get_parameters(self):
         return (self.weights_, self.means_, self.covariances_,
                 self.precisions_cholesky_)

From b1918e83de705b97b789d970ecd471903141182c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 2 Aug 2019 15:33:41 +0200
Subject: [PATCH 38/86] address different comments

---
 doc/whats_new/v0.22.rst                          | 4 ++--
 sklearn/compose/tests/test_column_transformer.py | 2 +-
 sklearn/utils/__init__.py                        | 2 +-
 sklearn/utils/tests/test_utils.py                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index db9e0e574da06..0f3c5665e3aa6 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -65,8 +65,8 @@ Changelog
 ......................
 
 - |Fix| Fixed a bug in :class:`compose.ColumnTransformer` which failed to
-  select the proper columns when using a boolean list and NumPy older than
-  1.13.
+  select the proper columns when using a boolean list, with NumPy older than
+  1.12.
   :pr:`14510` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 :mod:`sklearn.datasets`
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index a667b35cf65e3..d28a82374ad5b 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1114,7 +1114,7 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
 @pytest.mark.parametrize("array_type", [np.asarray, sparse.csr_matrix])
 def test_column_transformer_mask_indexing(array_type):
     # Regression test for #14510
-    # Boolean array-like does not behave as boolean array with NumPy < 1.13
+    # Boolean array-like does not behave as boolean array with NumPy < 1.12
     # and sparse matrices as well
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
     X = array_type(X)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 3b4a20d08716b..f95a0d6cccc57 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -233,7 +233,7 @@ def _array_indexing(array, key, axis=0):
             "'axis' should be either 0 (to index rows) or 1 (to index "
             " column). Got {} instead.".format(axis)
         )
-    if np_version < (1, 13) or issparse(array):
+    if np_version < (1, 12) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
         if np.issubdtype(key_array.dtype, np.bool_):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 49f50eedc0a42..806295f1aae28 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -371,7 +371,7 @@ def test_safe_indexing_mock_pandas(asarray):
 def test_safe_indexing_mask_axis_1(array_type):
     # regression test for #14510
     # check that boolean array-like and boolean array lead to the same indexing
-    # even in NumPy < 1.13
+    # even in NumPy < 1.12
     if array_type == 'array':
         array_constructor = np.asarray
     elif array_type == 'sparse':

From 6322f99d5d85918643e2ba1b86539cb654d8cb60 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 2 Aug 2019 15:54:55 +0200
Subject: [PATCH 39/86] iter

---
 sklearn/utils/__init__.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 19e5e78b27cf3..638dc2e6f64fc 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -185,6 +185,7 @@ def axis0_safe_slice(X, mask, len_mask):
 def _array_indexing(array, key, axis):
     """Index an array consistently across NumPy version."""
     if np_version < (1, 12) or issparse(array):
+        # FIXME: Remove the check for NumPy when using >= 1.12
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
         if np.issubdtype(key_array.dtype, np.bool_):
@@ -196,19 +197,11 @@ def _pandas_indexing(X, key, axis, by_name):
     """Index a pandas dataframe or a series."""
     if hasattr(key, 'shape'):
         # Work-around for indexing with read-only key in pandas
+        # FIXME: solved in pandas 0.25
+        key = np.asarray(key)
         key = key if key.flags.writeable else key.copy()
     indexer = 'loc' if by_name else 'iloc'
-    try:
-        return (getattr(X, indexer)[:, key]
-                if axis else getattr(X, indexer)[key])
-    except ValueError:
-        # Cython typed memoryviews internally used in pandas do not support
-        # readonly buffers.
-        warnings.warn(
-            "Copying input dataframe for slicing.", DataConversionWarning
-        )
-        return (getattr(X.copy(), indexer)[:, key]
-                if axis else getattr(X.copy(), indexer)[key])
+    return (getattr(X, indexer)[:, key] if axis else getattr(X, indexer)[key])
 
 
 def _list_indexing(X, key):

From e478e207cf377b287c5d938a6af0f3314189dd41 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 2 Aug 2019 16:48:16 +0200
Subject: [PATCH 40/86] iter

---
 build_tools/azure/test_script.sh                    | 2 +-
 sklearn/inspection/partial_dependence.py            | 1 +
 sklearn/inspection/tests/test_partial_dependence.py | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
index 27a3e4b649a13..6e27915e0be6b 100755
--- a/build_tools/azure/test_script.sh
+++ b/build_tools/azure/test_script.sh
@@ -21,7 +21,7 @@ except ImportError:
 python -c "import multiprocessing as mp; print('%d CPUs' % mp.cpu_count())"
 pip list
 
-TEST_CMD="python -m pytest --verbose --showlocals --durations=20 --junitxml=$JUNITXML"
+TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
 
 if [[ "$COVERAGE" == "true" ]]; then
     export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc"
diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index cb3a3d986dd44..4e219dbb77418 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -359,6 +359,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
             fitted_attribute = 'n_iter_'
         check_is_fitted(estimator, fitted_attribute, msg=msg)
 
+    print(features)
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
     ).ravel()
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index bc98db923449c..6b93e8e427a16 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -341,15 +341,15 @@ def test_partial_dependence_error(estimator, params, err_msg):
         partial_dependence(estimator, X, **params)
 
 
+@pytest.mark.parametrize("features", [-1, 100000])
 @pytest.mark.parametrize(
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
 )
-def test_partial_dependence_unknown_feature_indices(estimator):
+def test_partial_dependence_unknown_feature_indices(features, estimator):
     X, y = make_classification(random_state=0)
     estimator.fit(X, y)
 
-    features = 100000
     err_msg = 'all features must be in'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, X, [features])

From 4d4cc2db23c47601ae9d0af53cdca5e80330b3e2 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 2 Aug 2019 17:12:41 +0200
Subject: [PATCH 41/86] update error message

---
 sklearn/utils/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 638dc2e6f64fc..d85d755c29eb4 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -340,7 +340,8 @@ def _get_column_indices(X, key):
             idx = safe_indexing(np.arange(n_columns), key)
         except IndexError as e:
             raise ValueError(
-                'all features must be in [0, %d]' % (n_columns - 1)
+                'all features must be in [0, {}] or [-{}, 0]'
+                .format(n_columns - 1, n_columns)
             ) from e
         return np.atleast_1d(idx).tolist()
     elif _check_key_type(key, str):

From 3cb95ac2d76ecf3c494446d44838b88347e5f1e8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 14:43:30 -0400
Subject: [PATCH 42/86] fix partial dependence function

---
 sklearn/ensemble/partial_dependence.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py
index 594ed39568c27..cce79342c393f 100644
--- a/sklearn/ensemble/partial_dependence.py
+++ b/sklearn/ensemble/partial_dependence.py
@@ -138,7 +138,7 @@ def partial_dependence(gbrt, target_variables, grid=None, X=None,
     """
     if not isinstance(gbrt, BaseGradientBoosting):
         raise ValueError('gbrt has to be an instance of BaseGradientBoosting')
-    check_is_fitted(gbrt, 'estimators_')
+    check_is_fitted(gbrt)
     if (grid is None and X is None) or (grid is not None and X is not None):
         raise ValueError('Either grid or X must be specified')
 
@@ -270,7 +270,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None,
 
     if not isinstance(gbrt, BaseGradientBoosting):
         raise ValueError('gbrt has to be an instance of BaseGradientBoosting')
-    check_is_fitted(gbrt, 'estimators_')
+    check_is_fitted(gbrt)
 
     # set label_idx for multi-class GBRT
     if hasattr(gbrt, 'classes_') and np.size(gbrt.classes_) > 2:

From 4d3a8b47ac98bc897c7675e21a2c8ac0d8571750 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 14:53:17 -0400
Subject: [PATCH 43/86] make change backward-compatible

---
 sklearn/utils/tests/test_validation.py | 5 +++++
 sklearn/utils/validation.py            | 7 +++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 14b13d94ca5d1..ec812b64938bc 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -661,6 +661,11 @@ def test_check_is_fitted():
     assert check_is_fitted(ard) is None
     assert check_is_fitted(svr) is None
 
+    assert_warns_message(
+        DeprecationWarning,
+        "Passing attributes to check_is_fitted is deprecated",
+        check_is_fitted, ard, ['coef_'])
+
 
 def test_check_consistent_length():
     check_consistent_length([1], [2], [3], [4], [5])
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 48daa64da77ee..558c3e9de060f 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -155,7 +155,6 @@ def _num_samples(x):
         raise TypeError(message)
 
 
-
 def check_memory(memory):
     """Check that ``memory`` is joblib.Memory-like.
 
@@ -866,7 +865,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True,
     return array
 
 
-def check_is_fitted(estimator, *, msg=None):
+def check_is_fitted(estimator, attributes='deprecated', msg=None):
     """Perform is_fitted validation for estimator.
 
     Checks if the estimator is fitted by verifying the presence of
@@ -897,6 +896,10 @@ def check_is_fitted(estimator, *, msg=None):
     NotFittedError
         If the attributes are not found.
     """
+    if attributes != 'deprecated':
+        warnings.warn("Passing attributes to check_is_fitted is deprecated"
+                      "and will be removed in 0.23. The attributes "
+                      "argument is ignored.", DeprecationWarning)
     if msg is None:
         msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                "appropriate arguments before using this method.")

From 1181982935808ceb7b68ff697baed702471a7d9b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 15:25:36 -0400
Subject: [PATCH 44/86] also allow private fitted attributes

---
 sklearn/utils/validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 558c3e9de060f..d0fe8276d853f 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -907,7 +907,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None):
     if not hasattr(estimator, 'fit'):
         raise TypeError("%s is not an estimator instance." % (estimator))
 
-    attrs = [v for v in vars(estimator) if v.endswith("_")
+    attrs = [v for v in vars(estimator)
+             if (v.endswith("_") or v.startswith("_"))
              and not v.startswith("__")]
 
     if not len(attrs):

From 7ed876d57c149b763fa4ac4d93c7c62f29e46446 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 15:48:57 -0400
Subject: [PATCH 45/86] slight refactoring in CountVectorizer to mess less with
 the vocabulary

---
 sklearn/feature_extraction/text.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 01a7b70587f3d..4944d23200418 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -32,7 +32,7 @@
 from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
 from ..utils import _IS_32BIT
 from ..utils.fixes import _astype_copy_false
-from ..exceptions import ChangedBehaviorWarning
+from ..exceptions import ChangedBehaviorWarning, NotFittedError
 
 
 __all__ = ['HashingVectorizer',
@@ -450,9 +450,11 @@ def _validate_vocabulary(self):
             self.fixed_vocabulary_ = False
 
     def _check_vocabulary(self):
-        """Check if vocabulary is empty or missing (not fit-ed)"""
-        msg = "%(name)s - Vocabulary wasn't fitted."
-        check_is_fitted(self, msg=msg),
+        """Check if vocabulary is empty or missing (not fitted)"""
+        if not hasattr(self, 'vocabulary_'):
+            self._validate_vocabulary()
+            if not self.fixed_vocabulary_:
+                raise NotFittedError("Vocabulary not fitted or provided")
 
         if len(self.vocabulary_) == 0:
             raise ValueError("Vocabulary is empty")
@@ -1172,10 +1174,6 @@ def transform(self, raw_documents):
             raise ValueError(
                 "Iterable over raw text documents expected, "
                 "string object received.")
-
-        if not hasattr(self, 'vocabulary_'):
-            self._validate_vocabulary()
-
         self._check_vocabulary()
 
         # use the same matrix-building strategy as fit_transform
@@ -1216,8 +1214,6 @@ def inverse_transform(self, X):
 
     def get_feature_names(self):
         """Array mapping from feature integer indices to feature name"""
-        if not hasattr(self, 'vocabulary_'):
-            self._validate_vocabulary()
 
         self._check_vocabulary()
 

From 8701cc0e07a9d070e8d823edbc675a06453e5db8 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 15:52:42 -0400
Subject: [PATCH 46/86] added regression test for not being able to call
 inverse_transform before transform

---
 sklearn/feature_extraction/tests/test_text.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 2bc1ad25bca63..e3be2c27955b6 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -261,6 +261,10 @@ def test_countvectorizer_custom_vocabulary():
             assert set(vect.vocabulary_) == terms
         X = vect.transform(JUNK_FOOD_DOCS)
         assert X.shape[1] == len(terms)
+        v = typ(vocab)
+        vect = CountVectorizer(vocabulary=v)
+        inv = vect.inverse_transform(X)
+        assert len(inv) == X.shape[0]
 
 
 def test_countvectorizer_custom_vocabulary_pipeline():

From be4a90f0bf82d684f53e3bb405b7b51be115c83e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Fri, 2 Aug 2019 16:16:35 -0400
Subject: [PATCH 47/86] add special check for classes

---
 sklearn/utils/tests/test_validation.py | 4 ++--
 sklearn/utils/validation.py            | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index ec812b64938bc..552bd85e91b99 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -631,8 +631,8 @@ def test_check_symmetric():
 
 
 def test_check_is_fitted():
-    # Check is ValueError raised when non estimator instance passed
-    assert_raises(ValueError, check_is_fitted, ARDRegression)
+    # Check is TypeError raised when non estimator instance passed
+    assert_raises(TypeError, check_is_fitted, ARDRegression)
     assert_raises(TypeError, check_is_fitted, "SVR")
 
     ard = ARDRegression()
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index d0fe8276d853f..06604bb583f66 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -14,7 +14,7 @@
 import numpy as np
 import scipy.sparse as sp
 from distutils.version import LooseVersion
-from inspect import signature
+from inspect import signature, isclass
 
 from numpy.core.numeric import ComplexWarning
 import joblib
@@ -900,6 +900,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None):
         warnings.warn("Passing attributes to check_is_fitted is deprecated"
                       "and will be removed in 0.23. The attributes "
                       "argument is ignored.", DeprecationWarning)
+    if isclass(estimator):
+        raise TypeError("{} is a class, not an instance.".format(estimator))
     if msg is None:
         msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                "appropriate arguments before using this method.")

From b62933d25f1ae6177661bc5b956ec981cf98ebef Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Aug 2019 12:33:57 +0200
Subject: [PATCH 48/86] address comments

---
 sklearn/inspection/partial_dependence.py      | 13 +++---
 .../tests/test_partial_dependence.py          | 40 ++++++++++++-------
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 4e219dbb77418..495fbb9bd7deb 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -352,14 +352,13 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        msg = "'estimator' parameter must be a fitted estimator"
-        if isinstance(estimator, BaseGradientBoosting):
-            fitted_attribute = 'estimators_'
-        else:
-            fitted_attribute = 'n_iter_'
-        check_is_fitted(estimator, fitted_attribute, msg=msg)
+        # msg = "'estimator' parameter must be a fitted estimator"
+        # if isinstance(estimator, BaseGradientBoosting):
+        #     fitted_attribute = 'estimators_'
+        # else:
+        #     fitted_attribute = 'n_iter_'
+        # check_is_fitted(estimator, fitted_attribute, msg=msg)
 
-    print(features)
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
     ).ravel()
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 6b93e8e427a16..ca85db1a0d17d 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -341,15 +341,15 @@ def test_partial_dependence_error(estimator, params, err_msg):
         partial_dependence(estimator, X, **params)
 
 
-@pytest.mark.parametrize("features", [-1, 100000])
 @pytest.mark.parametrize(
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
 )
-def test_partial_dependence_unknown_feature_indices(features, estimator):
+def test_partial_dependence_unknown_feature_indices(estimator):
     X, y = make_classification(random_state=0)
     estimator.fit(X, y)
 
+    features = 100000
     err_msg = 'all features must be in'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, X, [features])
@@ -449,10 +449,11 @@ def test_partial_dependence_pipeline():
 
     features = 0
     pdp_pipe, values_pipe = partial_dependence(
-        pipe, iris.data, features=[features]
+        pipe, iris.data, features=[features], grid_resolution=10
     )
     pdp_clf, values_clf = partial_dependence(
-        clf, scaler.transform(iris.data), features=[features]
+        clf, scaler.transform(iris.data), features=[features],
+        grid_resolution=10
     )
     assert_allclose(pdp_pipe, pdp_clf)
     assert_allclose(
@@ -487,7 +488,9 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
 
     pipe = make_pipeline(preprocessor, estimator)
     pipe.fit(df, iris.target)
-    pdp_pipe, values_pipe = partial_dependence(pipe, df, features=features)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
 
     # the column transformer will reorder the column when transforming
     # we mixed the index to be sure that we are computing the partial
@@ -501,7 +504,7 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
 
     clf = clone(estimator).fit(X_proc, iris.target)
     pdp_clf, values_clf = partial_dependence(
-        clf, X_proc, features=features_clf, method='brute'
+        clf, X_proc, features=features_clf, method='brute', grid_resolution=10
     )
 
     assert_allclose(pdp_pipe, pdp_clf)
@@ -516,13 +519,16 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
 
 
 @pytest.mark.parametrize(
-    "features",
-    [0, iris.feature_names[0],
-     [0, 2], [iris.feature_names[i] for i in (0, 2)],
-     slice(0, 2, 1), [True, False, True, False]],
+    "features, expected_pd_shape",
+    [(0, (3, 10)),
+     (iris.feature_names[0], (3, 10)),
+     ([0, 2], (3, 10, 10)),
+     ([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
+     (slice(0, 2, 1), (3, 10, 10)),
+     ([True, False, True, False], (3, 10, 10))],
     ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'slice', 'mask']
 )
-def test_partial_dependence_feature_type(features):
+def test_partial_dependence_feature_type(features, expected_pd_shape):
     # check all possible features type supported in PDP
     pd = pytest.importorskip("pandas")
     df = pd.DataFrame(iris.data, columns=iris.feature_names)
@@ -535,7 +541,11 @@ def test_partial_dependence_feature_type(features):
         preprocessor, LogisticRegression(max_iter=1000, random_state=0)
     )
     pipe.fit(df, iris.target)
-    pdp_pipe, values_pipe = partial_dependence(pipe, df, features=features)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
+    assert pdp_pipe.shape == expected_pd_shape
+    assert len(values_pipe) == len(pdp_pipe.shape) - 1
 
 
 def test_plot_partial_dependence(pyplot):
@@ -647,10 +657,10 @@ def test_plot_partial_dependence_multioutput(pyplot):
      (multioutput_regression_data[0], {"target": 100, 'features': [0]},
       r'target must be in \[0, n_tasks\]'),
      (make_classification(random_state=0),
-     {'features': ['foobar'], 'feature_names': None},
-     'Feature foobar not in feature_names'),
+      {'features': ['foobar'], 'feature_names': None},
+      'Feature foobar not in feature_names'),
      (make_classification(random_state=0),
-     {'features': ['foobar'], 'feature_names': ['abcd', 'def']},
+      {'features': ['foobar'], 'feature_names': ['abcd', 'def']},
       'Feature foobar not in feature_names'),
      (make_classification(random_state=0), {'features': [(1, 2, 3)]},
       'Each entry in features must be either an int, '),

From 7e330276fa23ce04f1489b7e4ac99ec0eead7072 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <andreas.mueller@columbia.edu>
Date: Mon, 5 Aug 2019 11:28:49 -0400
Subject: [PATCH 49/86] more functions to fix

---
 sklearn/tree/export.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 99b5e54a4c9b7..8e2e0a486b756 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -11,7 +11,6 @@
 #          Li Li <aiki.nogard@gmail.com>
 #          Giuseppe Vettigli <vettigli@gmail.com>
 # License: BSD 3 clause
-import warnings
 from io import StringIO
 
 from numbers import Integral
@@ -743,7 +742,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None,
     'digraph Tree {...
     """
 
-    check_is_fitted(decision_tree, 'tree_')
+    check_is_fitted(decision_tree)
     own_file = False
     return_string = False
     try:
@@ -849,7 +848,7 @@ def export_text(decision_tree, feature_names=None, max_depth=10,
     |   |--- petal width (cm) >  1.75
     |   |   |--- class: 2
     """
-    check_is_fitted(decision_tree, 'tree_')
+    check_is_fitted(decision_tree)
     tree_ = decision_tree.tree_
     class_names = decision_tree.classes_
     right_child_fmt = "{} {} <= {}\n"

From 82fbc6f6f26ffe8e0ee1a9654685d348910ccce6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Aug 2019 18:45:12 +0200
Subject: [PATCH 50/86] address almost all comments

---
 sklearn/inspection/partial_dependence.py      | 26 +++++--------
 .../tests/test_partial_dependence.py          | 38 +++++++++++++------
 sklearn/utils/validation.py                   | 23 +++++------
 3 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 495fbb9bd7deb..919be3832812e 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -145,7 +145,7 @@ def _partial_dependence_brute(est, grid, features, X, response_method):
         # (n_points,) for non-multioutput regressors
         # (n_points, n_tasks) for multioutput regressors
         # (n_points, 1) for the regressors in cross_decomposition (I think)
-        # (n_points, 2)  for binary classifaction
+        # (n_points, 2) for binary classification
         # (n_points, n_classes) for multiclass classification
 
         # average over samples
@@ -289,17 +289,15 @@ def partial_dependence(estimator, X, features, response_method='auto',
     """
     if not (is_classifier(estimator) or is_regressor(estimator)):
         raise ValueError(
-            "'estimator' must be a fitted regressor or classifier.")
+            "'estimator' must be a fitted regressor or classifier."
+        )
+    check_is_fitted(estimator)
 
-    if is_classifier(estimator):
-        if not hasattr(estimator, 'classes_'):
-            raise ValueError(
-                "'estimator' parameter must be a fitted estimator"
-            )
-        if isinstance(estimator.classes_[0], np.ndarray):
-            raise ValueError(
-                'Multiclass-multioutput estimators are not supported'
-            )
+    if (is_classifier(estimator) and
+            isinstance(estimator.classes_[0], np.ndarray)):
+        raise ValueError(
+            'Multiclass-multioutput estimators are not supported'
+        )
 
     if not(hasattr(X, '__array__') or sparse.issparse(X)):
         X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
@@ -352,12 +350,6 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "With the 'recursion' method, the response_method must be "
                 "'decision_function'. Got {}.".format(response_method)
             )
-        # msg = "'estimator' parameter must be a fitted estimator"
-        # if isinstance(estimator, BaseGradientBoosting):
-        #     fitted_attribute = 'estimators_'
-        # else:
-        #     fitted_attribute = 'n_iter_'
-        # check_is_fitted(estimator, fitted_attribute, msg=msg)
 
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index ca85db1a0d17d..1b8bc0260c119 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -33,8 +33,11 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.dummy import DummyClassifier
 from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.exceptions import NotFittedError
+from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
+from sklearn.utils.testing import SkipTest
 
 
 # toy sample
@@ -349,7 +352,7 @@ def test_partial_dependence_unknown_feature_indices(estimator):
     X, y = make_classification(random_state=0)
     estimator.fit(X, y)
 
-    features = 100000
+    features = 10000
     err_msg = 'all features must be in'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, X, [features])
@@ -371,16 +374,6 @@ def test_partial_dependence_unknown_feature_string(estimator):
         partial_dependence(estimator, df, [features])
 
 
-@pytest.mark.parametrize(
-    'estimator',
-    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
-)
-def test_partial_dependence_unfitted_estimator(estimator):
-    err_msg = "'estimator' parameter must be a fitted estimator"
-    with pytest.raises(ValueError, match=err_msg):
-        partial_dependence(estimator, X, [0])
-
-
 @pytest.mark.parametrize(
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
@@ -548,6 +541,29 @@ def test_partial_dependence_feature_type(features, expected_pd_shape):
     assert len(values_pipe) == len(pdp_pipe.shape) - 1
 
 
+@pytest.mark.parametrize(
+    "name, Estimator", all_estimators(type_filter=['classifier', 'regressor'])
+)
+def test_partial_dependence_unfitted(name, Estimator):
+    try:
+        estimator = Estimator()
+    except TypeError:
+        raise SkipTest(
+            'The {} estimator cannot be built with default parameters'
+            .format(name)
+        )
+
+    X, y = iris.data, iris.target
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
+    )
+    pipe = make_pipeline(preprocessor, estimator)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
+
+
 def test_plot_partial_dependence(pyplot):
     # Test partial dependence plot function.
     boston = load_boston()
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 06604bb583f66..12561d1fed450 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -865,7 +865,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True,
     return array
 
 
-def check_is_fitted(estimator, attributes='deprecated', msg=None):
+def check_is_fitted(estimator, msg=None):
     """Perform is_fitted validation for estimator.
 
     Checks if the estimator is fitted by verifying the presence of
@@ -877,7 +877,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None):
     estimator : estimator instance.
         estimator instance for which the check is performed.
 
-
     msg : string
         The default error message is, "This %(name)s instance is not fitted
         yet. Call 'fit' with appropriate arguments before using this method."
@@ -896,10 +895,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None):
     NotFittedError
         If the attributes are not found.
     """
-    if attributes != 'deprecated':
-        warnings.warn("Passing attributes to check_is_fitted is deprecated"
-                      "and will be removed in 0.23. The attributes "
-                      "argument is ignored.", DeprecationWarning)
     if isclass(estimator):
         raise TypeError("{} is a class, not an instance.".format(estimator))
     if msg is None:
@@ -909,12 +904,18 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None):
     if not hasattr(estimator, 'fit'):
         raise TypeError("%s is not an estimator instance." % (estimator))
 
-    attrs = [v for v in vars(estimator)
-             if (v.endswith("_") or v.startswith("_"))
-             and not v.startswith("__")]
+    from ..pipeline import Pipeline
+    if isinstance(estimator, Pipeline):
+        for est in estimator:
+            if est is not None:
+                check_is_fitted(est)
+    else:
+        attrs = [v for v in vars(estimator)
+                 if (v.endswith("_") or v.startswith("_"))
+                 and not v.startswith("__")]
 
-    if not len(attrs):
-        raise NotFittedError(msg % {'name': type(estimator).__name__})
+        if not attrs:
+            raise NotFittedError(msg % {'name': type(estimator).__name__})
 
 
 def check_non_negative(X, whom):

From 18c8b551d37c331d7f98c12ccec6dad82d8fd710 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Aug 2019 18:47:48 +0200
Subject: [PATCH 51/86] PEP8

---
 sklearn/inspection/tests/test_partial_dependence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 1b8bc0260c119..1a7d4ca0830f1 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -553,7 +553,7 @@ def test_partial_dependence_unfitted(name, Estimator):
             .format(name)
         )
 
-    X, y = iris.data, iris.target
+    X = iris.data
     preprocessor = make_column_transformer(
         (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
     )

From 1587bfea6a6e58815c2f5df697258a25fdcc72b4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 12 Sep 2019 17:25:40 +0200
Subject: [PATCH 52/86] fix merge conflict error

---
 doc/whats_new/v0.22.rst                  | 6 ------
 sklearn/inspection/partial_dependence.py | 9 +--------
 sklearn/utils/__init__.py                | 1 -
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index c14cccdfd43c5..4a4e22718c529 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -450,12 +450,6 @@ Changelog
   ``decision_function_shape='ovr'``, and the number of target classes > 2.
   :pr:`12557` by `Adrin Jalali`_.
 
-:mod:`sklearn.feature_selection`
-................................
-- |Fix| Fixed a bug where :class:`VarianceThreshold` with `threshold=0` did not
-  remove constant features due to numerical instability, by using range
-  rather than variance in this case.
-  :pr:`13704` by `Roddy MacSween <rlms>`.
 - |Enhancement| SVM estimators now throw a more specific error when
   `kernel='precomputed'` and fit on non-square data.
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 0c4adaf081133..d692a3b5917b3 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -392,17 +392,10 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
     Parameters
     ----------
     estimator : BaseEstimator
-<<<<<<< HEAD
-        A fitted estimator object implementing `predict`, `predict_proba`,
-        or `decision_function`. Multioutput-multiclass classifiers are not
-        supported.
-    X : array-like or DataFrame, shape (n_samples, n_features)
-=======
         A fitted estimator object implementing :term:`predict`,
         :term:predict_proba`, or :term:`decision_function`.
         Multioutput-multiclass classifiers are not supported.
-    X : array-like, shape (n_samples, n_features)
->>>>>>> origin/master
+    X : {array-like or dataframe} of shape (n_samples, n_features)
         The data to use to build the grid of values on which the dependence
         will be evaluated. This is usually the training data.
     features : list of {int, str, pair of int, pair of str}
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 734c3d2bd8ccf..c26fab41bfc93 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -1,7 +1,6 @@
 """
 The :mod:`sklearn.utils` module includes various utilities.
 """
-from collections.abc import Iterable
 from collections.abc import Sequence
 from contextlib import contextmanager
 from itertools import compress

From 8a887cace4254157a60716b95634d723104cd495 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 12 Sep 2019 17:32:18 +0200
Subject: [PATCH 53/86] handle pipeline in partial dependence function

---
 sklearn/inspection/partial_dependence.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index d692a3b5917b3..f85a2ba97e058 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -15,6 +15,7 @@
 from joblib import Parallel, delayed
 
 from ..base import is_classifier, is_regressor
+from ..pipeline import Pipeline
 from ..utils.extmath import cartesian
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
@@ -291,7 +292,13 @@ def partial_dependence(estimator, X, features, response_method='auto',
         raise ValueError(
             "'estimator' must be a fitted regressor or classifier."
         )
-    check_is_fitted(estimator)
+
+    if isinstance(estimator, Pipeline):
+        for est in estimator:
+            if est not in (None, 'drop'):
+                check_is_fitted(est)
+    else:
+        check_is_fitted(estimator)
 
     if (is_classifier(estimator) and
             isinstance(estimator.classes_[0], np.ndarray)):

From b6e6a4456ac02eefb30b5188a14a58b7fa65e28a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 12 Sep 2019 22:29:17 +0200
Subject: [PATCH 54/86] drop support for negative int indexing

---
 sklearn/inspection/partial_dependence.py          | 15 ++++++++++++++-
 .../inspection/tests/test_partial_dependence.py   |  4 ++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index f85a2ba97e058..69f5a28377a5a 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -20,6 +20,7 @@
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
 from ..utils import safe_indexing
+from ..utils import _determine_key_type
 from ..utils import _get_column_indices
 from ..utils.validation import check_is_fitted
 from ..tree._tree import DTYPE
@@ -191,7 +192,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         ``X`` is used both to generate a grid of values for the
         ``features``, and to compute the averaged predictions when
         method is 'brute'.
-    features : list or array-like of int
+    features : array-like of {int, str}
         The target features for which the partial dependency should be
         computed.
     response_method : 'auto', 'predict_proba' or 'decision_function', \
@@ -358,6 +359,18 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "'decision_function'. Got {}.".format(response_method)
             )
 
+    if _determine_key_type(features) == 'int':
+        raise_err = False
+        if isinstance(features, Iterable):
+            raise_err = np.all(np.less(features, 0))
+        elif isinstance(features, numbers.Integral):
+            raise_err = features < 0
+
+        if raise_err:
+            raise ValueError(
+                'all features must be in [0, {}]'.format(X.shape[0] - 1)
+            )
+
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
     ).ravel()
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 845c8f3098404..4465a5c8218e8 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -348,11 +348,11 @@ def test_partial_dependence_error(estimator, params, err_msg):
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
 )
-def test_partial_dependence_unknown_feature_indices(estimator):
+@pytest.mark.parametrize('features', [-1, 10000])
+def test_partial_dependence_unknown_feature_indices(estimator, features):
     X, y = make_classification(random_state=0)
     estimator.fit(X, y)
 
-    features = 10000
     err_msg = 'all features must be in'
     with pytest.raises(ValueError, match=err_msg):
         partial_dependence(estimator, X, [features])

From 9dbfea52f6ff420049c78209d05c7e130b60451c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 13 Sep 2019 09:43:46 +0200
Subject: [PATCH 55/86] TST check dataframe are supported in
 plot_partial_dependence

---
 .../inspection/tests/test_partial_dependence.py  | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 4465a5c8218e8..36d63cf48e062 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -711,3 +711,19 @@ def test_plot_partial_dependence_fig(pyplot):
         clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)
 
     assert pyplot.gcf() is fig
+
+
+def test_plot_partial_dependence_dataframe(pyplot):
+    pd = pytest.importorskip('pandas')
+    boston = load_boston()
+    df = pd.DataFrame(boston.data, columns=boston.feature_names)
+    y = boston.target
+
+    grid_resolution = 25
+
+    clf = HistGradientBoostingRegressor(max_iter=50)
+    clf.fit(df, y)
+    plot_partial_dependence(
+        clf, df, ['TAX', 'AGE'], grid_resolution=grid_resolution,
+        feature_names=df.columns.tolist()
+    )

From 33865c82a01b9533a6a9cd0b82533bf23cc09d7b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 16 Sep 2019 14:18:22 +0200
Subject: [PATCH 56/86] Update sklearn/inspection/partial_dependence.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/inspection/partial_dependence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 2dce71edc8d91..f7cd230b7a637 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -368,7 +368,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
 
         if raise_err:
             raise ValueError(
-                'all features must be in [0, {}]'.format(X.shape[0] - 1)
+                'all features must be in [0, {}]'.format(X.shape[1] - 1)
             )
 
     features_indices = np.asarray(

From 3cf6d7572865a65210e862f3af79ea9389c55067 Mon Sep 17 00:00:00 2001
From: Kevin Winata <32704121+kwinata@users.noreply.github.com>
Date: Fri, 13 Sep 2019 17:16:20 +0800
Subject: [PATCH 57/86] DOC Add missing attributes to SVC and NuSVC (#14930)

---
 sklearn/svm/classes.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index dbf5b78a1a6d1..0c98d9ffb5d3e 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -584,15 +584,22 @@ class SVC(BaseSVC):
 
     probA_ : array, shape = [n_class * (n_class-1) / 2]
     probB_ : array, shape = [n_class * (n_class-1) / 2]
-        If probability=True, the parameters learned in Platt scaling to
-        produce probability estimates from decision values. If
-        probability=False, an empty array. Platt scaling uses the logistic
-        function
+        If `probability=True`, it corresponds to the parameters learned in
+        Platt scaling to produce probability estimates from decision values.
+        If `probability=False`, it's an empty array. Platt scaling uses the
+        logistic function
         ``1 / (1 + exp(decision_value * probA_ + probB_))``
         where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
         more information on the multiclass case and training procedure see
         section 8 of [1]_.
 
+    class_weight_ : ndarray of shape (n_class,)
+        Multipliers of parameter C for each class.
+        Computed based on the ``class_weight`` parameter.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
     Examples
     --------
     >>> import numpy as np
@@ -778,6 +785,27 @@ class NuSVC(BaseSVC):
     classes_ : array of shape = (n_classes,)
         The unique classes labels.
 
+    fit_status_ : int
+        0 if correctly fitted, 1 if the algorithm did not converge.
+
+    probA_ : ndarray, shape of (n_class * (n_class-1) / 2,)
+    probB_ : ndarray of shape (n_class * (n_class-1) / 2,)
+        If `probability=True`, it corresponds to the parameters learned in
+        Platt scaling to produce probability estimates from decision values.
+        If `probability=False`, it's an empty array. Platt scaling uses the
+        logistic function
+        ``1 / (1 + exp(decision_value * probA_ + probB_))``
+        where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
+        more information on the multiclass case and training procedure see
+        section 8 of [1]_.
+
+    class_weight_ : ndarray of shape (n_class,)
+        Multipliers of parameter C of each class.
+        Computed based on the ``class_weight`` parameter.
+
+    shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
+        Array dimensions of training vector ``X``.
+
     Examples
     --------
     >>> import numpy as np

From 34c82504515f0932a3e7d73be1c2c2cb5bbdf3ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Fri, 13 Sep 2019 11:53:57 +0200
Subject: [PATCH 58/86] DOC Remove GraphViz mention in plot_tree docstring
 (#14973)

---
 sklearn/tree/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index bb368a6e81f76..e75522b671750 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -96,7 +96,7 @@ def plot_tree(decision_tree, max_depth=None, feature_names=None,
     Parameters
     ----------
     decision_tree : decision tree regressor or classifier
-        The decision tree to be exported to GraphViz.
+        The decision tree to be plotted.
 
     max_depth : int, optional (default=None)
         The maximum depth of the representation. If None, the tree is fully

From 27de857e1aedda5046b02397a782bba9bd66a88e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Fri, 13 Sep 2019 13:22:33 +0200
Subject: [PATCH 59/86] MAINT filter deprecation warnings triggered by
 all_estimators (#14691)

---
 sklearn/utils/testing.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py
index b2cc4bc21dcc3..4645e455d04c6 100644
--- a/sklearn/utils/testing.py
+++ b/sklearn/utils/testing.py
@@ -514,7 +514,9 @@ def is_abstract(c):
         if IS_PYPY and ('_svmlight_format' in modname or
                         'feature_extraction._hashing' in modname):
             continue
-        module = __import__(modname, fromlist="dummy")
+        # Ignore deprecation warnings triggered at import time.
+        with ignore_warnings(category=DeprecationWarning):
+            module = __import__(modname, fromlist="dummy")
         classes = inspect.getmembers(module, inspect.isclass)
         all_classes.extend(classes)
 

From 0f9d4819e304fcac3ff4f444f1857dd884991efd Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 13 Sep 2019 09:48:13 -0400
Subject: [PATCH 60/86] MNT Deprecate enforce_estimator_tags_y (#14945)

* deprecate choose_check_classifiers_labels

* made new test file

* deprecated enforce_estimator_tags_y

* pep8

* Added note in whatsnew

* not sure what went wrong in the merge
---
 doc/whats_new/v0.22.rst                      |  4 ++
 sklearn/utils/estimator_checks.py            | 67 +++++++++++---------
 sklearn/utils/tests/test_deprecated_utils.py |  8 +++
 3 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 09eec39bbb9d5..015b0424e386e 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -488,6 +488,10 @@ Changelog
   :func:`~utils.estimator_checks.parametrize_with_checks`, to parametrize
   estimator checks for a list of estimators. :pr:`14381` by `Thomas Fan`_.
 
+- |API| The following utils have been deprecated and are now private:
+  - ``choose_check_classifiers_labels``
+  - ``enforce_estimator_tags_y``
+
 - |Enhancement| :func:`utils.safe_indexing` accepts an ``axis`` parameter to
   index array-like across rows and columns. The column indexing can be done on
   NumPy array, SciPy sparse matrix, and Pandas DataFrame. An additional
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b304280d10a3f..438892db23865 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -149,7 +149,7 @@ def check_supervised_y_no_nan(name, estimator_orig):
     rng = np.random.RandomState(888)
     X = rng.randn(10, 5)
     y = np.full(10, np.inf)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     errmsg = "Input contains NaN, infinity or a value too large for " \
              "dtype('float64')."
@@ -626,7 +626,7 @@ def check_estimator_sparse_data(name, estimator_orig):
     # catch deprecation warnings
     with ignore_warnings(category=DeprecationWarning):
         estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
     for matrix_format, X in _generate_sparse_matrix(X_csr):
         # catch deprecation warnings
         with ignore_warnings(category=(DeprecationWarning, FutureWarning)):
@@ -709,7 +709,7 @@ def check_sample_weights_list(name, estimator_orig):
             y = np.arange(10) % 2
         else:
             y = np.arange(10) % 3
-        y = enforce_estimator_tags_y(estimator, y)
+        y = _enforce_estimator_tags_y(estimator, y)
         sample_weight = [3] * 10
         # Test that estimators don't raise any exception
         estimator.fit(X, y, sample_weight=sample_weight)
@@ -735,7 +735,7 @@ def check_sample_weights_invariance(name, estimator_orig):
                       [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float'))
         y = np.array([1, 1, 1, 1, 2, 2, 2, 2,
                       1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int'))
-        y = enforce_estimator_tags_y(estimator1, y)
+        y = _enforce_estimator_tags_y(estimator1, y)
 
         estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y)))
         estimator2.fit(X, y=y, sample_weight=None)
@@ -765,7 +765,7 @@ def check_dtype_object(name, estimator_orig):
     else:
         y = (X[:, 0] * 4).astype(np.int)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     estimator.fit(X, y)
     if hasattr(estimator, "predict"):
@@ -820,7 +820,7 @@ def check_dict_unchanged(name, estimator_orig):
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
 
@@ -859,7 +859,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     y = X[:, 0].astype(np.int)
     if _safe_tags(estimator, 'binary_only'):
         y[y == 2] = 1
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -912,7 +912,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     if tags['binary_only']:
         y[y == 2] = 1
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -960,7 +960,7 @@ def check_methods_subset_invariance(name, estimator_orig):
     if _safe_tags(estimator_orig, 'binary_only'):
         y[y == 2] = 1
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -1001,7 +1001,7 @@ def check_fit2d_1sample(name, estimator_orig):
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -1033,7 +1033,7 @@ def check_fit2d_1feature(name, estimator_orig):
     X = pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -1046,7 +1046,7 @@ def check_fit2d_1feature(name, estimator_orig):
     if name == 'RANSACRegressor':
         estimator.residual_threshold = 0.5
 
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
     set_random_state(estimator, 1)
 
     msgs = ["1 feature(s)", "n_features = 1", "n_features=1"]
@@ -1069,7 +1069,7 @@ def check_fit1d(name, estimator_orig):
     if tags["no_validation"]:
         # FIXME this is a bit loose
         return
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if hasattr(estimator, "n_components"):
         estimator.n_components = 1
@@ -1210,7 +1210,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X -= X.min()
     X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
     set_random_state(estimator)
     pipeline = make_pipeline(estimator)
     estimator.fit(X, y)
@@ -1239,7 +1239,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     else:
         y = np.arange(10) % 3
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
     set_random_state(estimator)
 
     funcs = ["fit", "score", "partial_fit", "fit_predict", "fit_transform"]
@@ -1269,7 +1269,7 @@ def check_estimators_dtypes(name, estimator_orig):
     y = X_train_int_64[:, 0]
     if _safe_tags(estimator_orig, 'binary_only'):
         y[y == 2] = 1
-    y = enforce_estimator_tags_y(estimator_orig, y)
+    y = _enforce_estimator_tags_y(estimator_orig, y)
 
     methods = ["predict", "transform", "decision_function", "predict_proba"]
 
@@ -1300,7 +1300,7 @@ def check_estimators_empty_data_messages(name, estimator_orig):
     X_zero_features = np.empty(0).reshape(3, 0)
     # the following y should be accepted by both classifiers and regressors
     # and ignored by unsupervised models
-    y = enforce_estimator_tags_y(e, np.array([1, 0, 1]))
+    y = _enforce_estimator_tags_y(e, np.array([1, 0, 1]))
     msg = (r"0 feature\(s\) \(shape=\(3, 0\)\) while a minimum of \d* "
            "is required.")
     assert_raises_regex(ValueError, msg, e.fit, X_zero_features, y)
@@ -1318,7 +1318,7 @@ def check_estimators_nan_inf(name, estimator_orig):
     X_train_inf[0, 0] = np.inf
     y = np.ones(10)
     y[:5] = 0
-    y = enforce_estimator_tags_y(estimator_orig, y)
+    y = _enforce_estimator_tags_y(estimator_orig, y)
     error_string_fit = "Estimator doesn't check for NaN and inf in fit."
     error_string_predict = ("Estimator doesn't check for NaN and inf in"
                             " predict.")
@@ -1413,7 +1413,7 @@ def check_estimators_pickle(name, estimator_orig):
 
     estimator = clone(estimator_orig)
 
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     set_random_state(estimator)
     estimator.fit(X, y)
@@ -1600,7 +1600,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
         n_samples, n_features = X.shape
         classifier = clone(classifier_orig)
         X = pairwise_estimator_convert_X(X, classifier)
-        y = enforce_estimator_tags_y(classifier, y)
+        y = _enforce_estimator_tags_y(classifier, y)
 
         set_random_state(classifier)
         # raises error on malformed input for fit
@@ -1805,7 +1805,7 @@ def check_estimators_fit_returns_self(name, estimator_orig,
     X = pairwise_estimator_convert_X(X, estimator_orig)
 
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     if readonly_memmap:
         X, y = create_memmap_backed_data([X, y])
@@ -1842,7 +1842,7 @@ def check_supervised_y_2d(name, estimator_orig):
         y = np.arange(10) % 2
     else:
         y = np.arange(10) % 3
-    y = enforce_estimator_tags_y(estimator_orig, y)
+    y = _enforce_estimator_tags_y(estimator_orig, y)
     estimator = clone(estimator_orig)
     set_random_state(estimator)
     # fit
@@ -1965,7 +1965,7 @@ def check_regressors_int(name, regressor_orig):
     X = pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
-    y = enforce_estimator_tags_y(regressor_orig, y)
+    y = _enforce_estimator_tags_y(regressor_orig, y)
     rnd = np.random.RandomState(0)
     # separate estimators to control random seeds
     regressor_1 = clone(regressor_orig)
@@ -1994,7 +1994,7 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False):
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
     regressor = clone(regressor_orig)
-    y = enforce_estimator_tags_y(regressor, y)
+    y = _enforce_estimator_tags_y(regressor, y)
     if name in CROSS_DECOMPOSITION:
         rnd = np.random.RandomState(0)
         y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
@@ -2040,7 +2040,7 @@ def check_regressors_no_decision_function(name, regressor_orig):
 
     X = rng.normal(size=(10, 4))
     X = pairwise_estimator_convert_X(X, regressor_orig)
-    y = enforce_estimator_tags_y(regressor, X[:, 0])
+    y = _enforce_estimator_tags_y(regressor, X[:, 0])
 
     if hasattr(regressor, "n_components"):
         # FIXME CCA, PLS is not robust to rank 1 effects
@@ -2180,7 +2180,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X -= X.min()
     X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     set_random_state(estimator)
 
@@ -2270,7 +2270,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]])
     X = pairwise_estimator_convert_X(X, estimator_orig)
     y = [1, 1, 1, 2, 2, 2]
-    y = enforce_estimator_tags_y(estimator_orig, y)
+    y = _enforce_estimator_tags_y(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 
 
@@ -2278,7 +2278,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 def check_regressor_data_not_an_array(name, estimator_orig):
     X, y = _boston_subset(n_samples=50)
     X = pairwise_estimator_convert_X(X, estimator_orig)
-    y = enforce_estimator_tags_y(estimator_orig, y)
+    y = _enforce_estimator_tags_y(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 
 
@@ -2372,7 +2372,14 @@ def param_filter(p):
                     assert param_value == init_param.default, init_param.name
 
 
+# TODO: remove in 0.24
+@deprecated("enforce_estimator_tags_y is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
 def enforce_estimator_tags_y(estimator, y):
+    return _enforce_estimator_tags_y(estimator, y)
+
+
+def _enforce_estimator_tags_y(estimator, y):
     # Estimators with a `requires_positive_y` tag only accept strictly positive
     # data
     if _safe_tags(estimator, "requires_positive_y"):
@@ -2411,7 +2418,7 @@ def check_non_transformer_estimators_n_iter(name, estimator_orig):
     if hasattr(estimator, 'max_iter'):
         iris = load_iris()
         X, y_ = iris.data, iris.target
-        y_ = enforce_estimator_tags_y(estimator, y_)
+        y_ = _enforce_estimator_tags_y(estimator, y_)
 
         set_random_state(estimator, 0)
 
@@ -2627,7 +2634,7 @@ def check_fit_idempotent(name, estimator_orig):
         y = rng.normal(size=n_samples)
     else:
         y = rng.randint(low=0, high=2, size=n_samples)
-    y = enforce_estimator_tags_y(estimator, y)
+    y = _enforce_estimator_tags_y(estimator, y)
 
     train, test = next(ShuffleSplit(test_size=.2, random_state=rng).split(X))
     X_train, y_train = _safe_split(estimator, X, y, train)
diff --git a/sklearn/utils/tests/test_deprecated_utils.py b/sklearn/utils/tests/test_deprecated_utils.py
index c25c1d747ab6b..83dc599e0e7be 100644
--- a/sklearn/utils/tests/test_deprecated_utils.py
+++ b/sklearn/utils/tests/test_deprecated_utils.py
@@ -1,6 +1,9 @@
 import pytest
+import numpy as np
 
+from sklearn.dummy import DummyClassifier
 from sklearn.utils.estimator_checks import choose_check_classifiers_labels
+from sklearn.utils.estimator_checks import enforce_estimator_tags_y
 
 
 # This file tests the utils that are deprecated
@@ -9,3 +12,8 @@
 def test_choose_check_classifiers_labels_deprecated():
     with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
         choose_check_classifiers_labels(None, None, None)
+
+
+def test_enforce_estimator_tags_y():
+    with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
+        enforce_estimator_tags_y(DummyClassifier(), np.array([0, 1]))

From 9e0b7d2da4c085c324055ac0277c821f2d790d78 Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Fri, 13 Sep 2019 13:20:29 -0400
Subject: [PATCH 61/86] DOC Adds more docstring standards (#14744)

---
 doc/developers/contributing.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index a400fd1f7f6d4..4b24c7089a5a8 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -629,6 +629,12 @@ Finally, follow the formatting rules below to make it consistently good:
         of the mentioned shapes. The default value is
         `np.ones(shape=(n_samples,))`.
 
+    list_param : list of int
+
+    typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32
+
+    sample_weight : array-like of shape (n_samples,), default=None
+
 In general have the following in mind:
 
     1. Use Python basic types. (``bool`` instead of ``boolean``)
@@ -639,6 +645,14 @@ In general have the following in mind:
     4. 1D or 2D data can be a subset of
        ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like``
        can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``.
+    5. When specifying the data type of a list, use ``of`` as a delimiter: 
+       ``list of int``.
+    6. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32``
+       after defining the shape:
+       ``ndarray of shape (n_samples,), dtype=np.int32``.
+    7. When the default is ``None``, ``None`` only needs to be specified at the
+       end with ``default=None``. Be sure to include in the docstring, what it
+       means for the parameter or attribute to be ``None``.
 
 * For unwritten formatting rules, try to follow existing good works:
 

From 2db5c0d385a3d820c96356258148867fa0dfd6ae Mon Sep 17 00:00:00 2001
From: Jesper Dramsch <jesper@dramsch.net>
Date: Fri, 13 Sep 2019 20:29:20 +0200
Subject: [PATCH 62/86] DOC Add example for GroupShuffleSplit (#14906)

---
 sklearn/model_selection/_split.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index ab681e89c1916..c49a3ce6aea4e 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1481,6 +1481,22 @@ class GroupShuffleSplit(ShuffleSplit):
         If None, the random number generator is the RandomState instance used
         by `np.random`.
 
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.model_selection import GroupShuffleSplit
+    >>> X = np.ones(shape=(8, 2))
+    >>> y = np.ones(shape=(8, 1))
+    >>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])
+    >>> print(groups.shape)
+    (8,)
+    >>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)
+    >>> gss.get_n_splits()
+    2
+    >>> for train_idx, test_idx in gss.split(X, y, groups):
+    ...    print("TRAIN:", train_idx, "TEST:", test_idx)
+    TRAIN: [2 3 4 5 6 7] TEST: [0 1]
+    TRAIN: [0 1 5 6 7] TEST: [2 3 4]
     '''
 
     def __init__(self, n_splits=5, test_size=None, train_size=None,

From 334fe5a2faeafcaee29eaa7df866164542ebfee4 Mon Sep 17 00:00:00 2001
From: catajara <48599015+catajara@users.noreply.github.com>
Date: Fri, 13 Sep 2019 15:19:56 -0400
Subject: [PATCH 63/86] DOC add missing attributes to OneVsRestClassifier
 (#14783)

---
 sklearn/multiclass.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index 8b28507906e2b..9cee9661489b6 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -170,11 +170,17 @@ class OneVsRestClassifier(MultiOutputMixin, ClassifierMixin,
 
     classes_ : array, shape = [`n_classes`]
         Class labels.
+
+    n_classes_ : int
+        Number of classes.
+
     label_binarizer_ : LabelBinarizer object
         Object used to transform multiclass labels to binary labels and
         vice-versa.
+
     multilabel_ : boolean
         Whether a OneVsRestClassifier is a multilabel classifier.
+
     """
     def __init__(self, estimator, n_jobs=None):
         self.estimator = estimator

From 6f4509a2743d5c6b67f00445272263a96f4a5214 Mon Sep 17 00:00:00 2001
From: Ming Li <14131823+minggli@users.noreply.github.com>
Date: Sat, 14 Sep 2019 21:00:58 +0100
Subject: [PATCH 64/86] TST Adjusts rtol for test_lda_predict (#14978)

---
 sklearn/tests/test_discriminant_analysis.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 6a32c4ec15058..002d69357e1c5 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from numpy.testing import assert_allclose
 from scipy import linalg
 
 from sklearn.exceptions import ChangedBehaviorWarning
@@ -76,8 +75,8 @@ def test_lda_predict():
         assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y,
                            'solver %s' % solver)
         y_log_proba_pred1 = clf.predict_log_proba(X1)
-        assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1,
-                                  8, 'solver %s' % solver)
+        assert_allclose(np.exp(y_log_proba_pred1), y_proba_pred1,
+                        rtol=1e-6, err_msg='solver %s' % solver)
 
         # Primarily test for commit 2f34950 -- "reuse" of priors
         y_pred3 = clf.fit(X, y3).predict(X)

From c20e3120da809901efb2bd0d851e8cbae049313f Mon Sep 17 00:00:00 2001
From: Andrea Navarrete <andrea.navarrete126@gmail.com>
Date: Sat, 14 Sep 2019 17:40:21 -0400
Subject: [PATCH 65/86] DOC Change default dataset for
 `plot_johnson_lindenstrauss_bound.py` (#14787)

---
 examples/plot_johnson_lindenstrauss_bound.py | 28 +++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/examples/plot_johnson_lindenstrauss_bound.py b/examples/plot_johnson_lindenstrauss_bound.py
index 988de0fe2735b..b981c14fbf132 100644
--- a/examples/plot_johnson_lindenstrauss_bound.py
+++ b/examples/plot_johnson_lindenstrauss_bound.py
@@ -102,27 +102,26 @@
 # Empirical validation
 # ====================
 #
-# We validate the above bounds on the digits dataset or on the 20 newsgroups
-# text document (TF-IDF word frequencies) dataset:
-#
-# - for the digits dataset, some 8x8 gray level pixels data for 500
-#   handwritten digits pictures are randomly projected to spaces for various
-#   larger number of dimensions ``n_components``.
+# We validate the above bounds on the 20 newsgroups text document
+# (TF-IDF word frequencies) dataset or on the digits dataset:
 #
 # - for the 20 newsgroups dataset some 500 documents with 100k
 #   features in total are projected using a sparse random matrix to smaller
 #   euclidean spaces with various values for the target number of dimensions
 #   ``n_components``.
 #
-# The default dataset is the digits dataset. To run the example on the twenty
-# newsgroups dataset, pass the --twenty-newsgroups command line argument to
+# - for the digits dataset, some 8x8 gray level pixels data for 500
+#   handwritten digits pictures are randomly projected to spaces for various
+#   larger number of dimensions ``n_components``.
+#
+# The default dataset is the 20 newsgroups dataset. To run the example on the
+# digits dataset, pass the ``--use-digits-dataset`` command line argument to
 # this script.
 
-if '--twenty-newsgroups' in sys.argv:
-    # Need an internet connection hence not enabled by default
-    data = fetch_20newsgroups_vectorized().data[:500]
-else:
+if '--use-digits-dataset' in sys.argv:
     data = load_digits().data[:500]
+else:
+    data = fetch_20newsgroups_vectorized().data[:500]
 
 ##########################################################
 # For each value of ``n_components``, we plot:
@@ -158,7 +157,10 @@
         projected_data, squared=True).ravel()[nonzero]
 
     plt.figure()
-    plt.hexbin(dists, projected_dists, gridsize=100, cmap=plt.cm.PuBu)
+    min_dist = min(projected_dists.min(), dists.min())
+    max_dist = max(projected_dists.max(), dists.max())
+    plt.hexbin(dists, projected_dists, gridsize=100, cmap=plt.cm.PuBu,
+               extent=[min_dist, max_dist, min_dist, max_dist])
     plt.xlabel("Pairwise squared distances in original space")
     plt.ylabel("Pairwise squared distances in projected space")
     plt.title("Pairwise distances distribution for n_components=%d" %

From 9b65ed75991d739595eb1286f43bb838a15a3199 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sun, 15 Sep 2019 18:09:09 -0400
Subject: [PATCH 66/86] MNT deprecate outputs_2d_ attribute of dummy estimators
 (#14933)

---
 doc/whats_new/v0.22.rst     |  6 +++++-
 sklearn/dummy.py            | 39 +++++++++++++++++++++++++------------
 sklearn/tests/test_dummy.py |  9 +++++++++
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 015b0424e386e..5606e36e6de81 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -139,12 +139,16 @@ Changelog
   `Adrin Jalali`_.
 
 :mod:`sklearn.dummy`
-............................
+....................
 
 - |Fix| :class:`dummy.DummyClassifier` now handles checking the existence
   of the provided constant in multiouput cases.
   :pr:`14908` by :user:`Martina G. Vilas <martinagvilas>`.
 
+- |API| The ``outputs_2d_`` attribute is deprecated in
+  :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`. It is
+  equivalent to ``n_outputs > 1``. :pr:`14933` by `Nicolas Hug`_
+
 :mod:`sklearn.ensemble`
 .......................
 
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index f95fcc3dcf618..233dc27aec076 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -17,6 +17,7 @@
 from .utils.random import random_choice_csc
 from .utils.stats import _weighted_percentile
 from .utils.multiclass import class_distribution
+from .utils import deprecated
 
 
 class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
@@ -120,8 +121,6 @@ def fit(self, X, y, sample_weight=None):
         if not self.sparse_output_:
             y = np.atleast_1d(y)
 
-        self.output_2d_ = y.ndim == 2 and y.shape[1] > 1
-
         if y.ndim == 1:
             y = np.reshape(y, (-1, 1))
 
@@ -154,7 +153,7 @@ def fit(self, X, y, sample_weight=None):
                                .format(self.constant, list(self.classes_[k])))
                     raise ValueError(err_msg)
 
-        if self.n_outputs_ == 1 and not self.output_2d_:
+        if self.n_outputs_ == 1:
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
             self.class_prior_ = self.class_prior_[0]
@@ -185,7 +184,7 @@ def predict(self, X):
         classes_ = self.classes_
         class_prior_ = self.class_prior_
         constant = self.constant
-        if self.n_outputs_ == 1 and not self.output_2d_:
+        if self.n_outputs_ == 1:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
@@ -194,7 +193,7 @@ def predict(self, X):
         # Compute probability only once
         if self.strategy == "stratified":
             proba = self.predict_proba(X)
-            if self.n_outputs_ == 1 and not self.output_2d_:
+            if self.n_outputs_ == 1:
                 proba = [proba]
 
         if self.sparse_output_:
@@ -231,7 +230,7 @@ def predict(self, X):
             elif self.strategy == "constant":
                 y = np.tile(self.constant, (n_samples, 1))
 
-            if self.n_outputs_ == 1 and not self.output_2d_:
+            if self.n_outputs_ == 1:
                 y = np.ravel(y)
 
         return y
@@ -263,7 +262,7 @@ def predict_proba(self, X):
         classes_ = self.classes_
         class_prior_ = self.class_prior_
         constant = self.constant
-        if self.n_outputs_ == 1 and not self.output_2d_:
+        if self.n_outputs_ == 1:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
@@ -294,7 +293,7 @@ def predict_proba(self, X):
 
             P.append(out)
 
-        if self.n_outputs_ == 1 and not self.output_2d_:
+        if self.n_outputs_ == 1:
             P = P[0]
 
         return P
@@ -355,6 +354,15 @@ def score(self, X, y, sample_weight=None):
             X = np.zeros(shape=(len(y), 1))
         return super().score(X, y, sample_weight)
 
+    @deprecated(
+        "The outputs_2d_ attribute is deprecated in version 0.22 "
+        "and will be removed in version 0.24. It is equivalent to "
+        "n_outputs_ > 1."
+    )
+    @property
+    def outputs_2d_(self):
+        return self.n_outputs_ != 1
+
 
 class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     """
@@ -429,8 +437,6 @@ def fit(self, X, y, sample_weight=None):
         if len(y) == 0:
             raise ValueError("y must not be empty.")
 
-        self.output_2d_ = y.ndim == 2 and y.shape[1] > 1
-
         if y.ndim == 1:
             y = np.reshape(y, (-1, 1))
         self.n_outputs_ = y.shape[1]
@@ -470,7 +476,7 @@ def fit(self, X, y, sample_weight=None):
                                         accept_sparse=['csr', 'csc', 'coo'],
                                         ensure_2d=False, ensure_min_samples=0)
 
-            if self.output_2d_ and self.constant.shape[0] != y.shape[1]:
+            if self.n_outputs_ != 1 and self.constant.shape[0] != y.shape[1]:
                 raise ValueError(
                     "Constant target value should have "
                     "shape (%d, 1)." % y.shape[1])
@@ -508,7 +514,7 @@ def predict(self, X, return_std=False):
                     dtype=np.array(self.constant_).dtype)
         y_std = np.zeros((n_samples, self.n_outputs_))
 
-        if self.n_outputs_ == 1 and not self.output_2d_:
+        if self.n_outputs_ == 1:
             y = np.ravel(y)
             y_std = np.ravel(y_std)
 
@@ -554,3 +560,12 @@ def score(self, X, y, sample_weight=None):
         if X is None:
             X = np.zeros(shape=(len(y), 1))
         return super().score(X, y, sample_weight)
+
+    @deprecated(
+        "The outputs_2d_ attribute is deprecated in version 0.22 "
+        "and will be removed in version 0.24. It is equivalent to "
+        "n_outputs_ > 1."
+    )
+    @property
+    def outputs_2d_(self):
+        return self.n_outputs_ != 1
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 690fad42a58b8..88b2d16fba46e 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -755,3 +755,12 @@ def test_dtype_of_classifier_probas(strategy):
     probas = model.fit(X, y).predict_proba(X)
 
     assert probas.dtype == np.float64
+
+
+@pytest.mark.parametrize("Dummy", (DummyRegressor, DummyClassifier))
+def test_outputs_2d_deprecation(Dummy):
+    X = [[1, 2]]
+    y = [0]
+    with pytest.warns(DeprecationWarning,
+                      match="will be removed in version 0.24"):
+        Dummy().fit(X, y).outputs_2d_

From e19a9d730fea5dd017b033ce1b270346fcc5d010 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 16 Sep 2019 04:58:56 -0400
Subject: [PATCH 67/86] [MRG] Make k_means use KMeans instead (#14985)

---
 sklearn/cluster/k_means_.py | 277 +++++++++++++++++++-----------------
 1 file changed, 146 insertions(+), 131 deletions(-)

diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
index a83df9c836b86..8af8cc6873011 100644
--- a/sklearn/cluster/k_means_.py
+++ b/sklearn/cluster/k_means_.py
@@ -288,131 +288,17 @@ def k_means(X, n_clusters, sample_weight=None, init='k-means++',
         Returned only if `return_n_iter` is set to True.
 
     """
-    if n_init <= 0:
-        raise ValueError("Invalid number of initializations."
-                         " n_init=%d must be bigger than zero." % n_init)
-    random_state = check_random_state(random_state)
-
-    if max_iter <= 0:
-        raise ValueError('Number of iterations should be a positive number,'
-                         ' got %d instead' % max_iter)
-
-    # avoid forcing order when copy_x=False
-    order = "C" if copy_x else None
-    X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],
-                    order=order, copy=copy_x)
-    # verify that the number of samples given is larger than k
-    if _num_samples(X) < n_clusters:
-        raise ValueError("n_samples=%d should be >= n_clusters=%d" % (
-            _num_samples(X), n_clusters))
-
-    tol = _tolerance(X, tol)
-
-    # If the distances are precomputed every job will create a matrix of shape
-    # (n_clusters, n_samples). To stop KMeans from eating up memory we only
-    # activate this if the created matrix is guaranteed to be under 100MB. 12
-    # million entries consume a little under 100MB if they are of type double.
-    if precompute_distances == 'auto':
-        n_samples = X.shape[0]
-        precompute_distances = (n_clusters * n_samples) < 12e6
-    elif isinstance(precompute_distances, bool):
-        pass
-    else:
-        raise ValueError("precompute_distances should be 'auto' or True/False"
-                         ", but a value of %r was passed" %
-                         precompute_distances)
-
-    # Validate init array
-    if hasattr(init, '__array__'):
-        init = check_array(init, dtype=X.dtype.type, copy=True)
-        _validate_center_shape(X, n_clusters, init)
-
-        if n_init != 1:
-            warnings.warn(
-                'Explicit initial center position passed: '
-                'performing only one init in k-means instead of n_init=%d'
-                % n_init, RuntimeWarning, stacklevel=2)
-            n_init = 1
-
-    # subtract of mean of x for more accurate distance computations
-    if not sp.issparse(X):
-        X_mean = X.mean(axis=0)
-        # The copy was already done above
-        X -= X_mean
-
-        if hasattr(init, '__array__'):
-            init -= X_mean
-
-    # precompute squared norms of data points
-    x_squared_norms = row_norms(X, squared=True)
-
-    best_labels, best_inertia, best_centers = None, None, None
-    if n_clusters == 1:
-        # elkan doesn't make sense for a single cluster, full will produce
-        # the right result.
-        algorithm = "full"
-    if algorithm == "auto":
-        algorithm = "full" if sp.issparse(X) else 'elkan'
-    if algorithm == "full":
-        kmeans_single = _kmeans_single_lloyd
-    elif algorithm == "elkan":
-        kmeans_single = _kmeans_single_elkan
-    else:
-        raise ValueError("Algorithm must be 'auto', 'full' or 'elkan', got"
-                         " %s" % str(algorithm))
-
-    seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
-    if effective_n_jobs(n_jobs) == 1:
-        # For a single thread, less memory is needed if we just store one set
-        # of the best results (as opposed to one set per run per thread).
-        for seed in seeds:
-            # run a k-means once
-            labels, inertia, centers, n_iter_ = kmeans_single(
-                X, sample_weight, n_clusters, max_iter=max_iter, init=init,
-                verbose=verbose, precompute_distances=precompute_distances,
-                tol=tol, x_squared_norms=x_squared_norms,
-                random_state=seed)
-            # determine if these results are the best so far
-            if best_inertia is None or inertia < best_inertia:
-                best_labels = labels.copy()
-                best_centers = centers.copy()
-                best_inertia = inertia
-                best_n_iter = n_iter_
-    else:
-        # parallelisation of k-means runs
-        results = Parallel(n_jobs=n_jobs, verbose=0)(
-            delayed(kmeans_single)(X, sample_weight, n_clusters,
-                                   max_iter=max_iter, init=init,
-                                   verbose=verbose, tol=tol,
-                                   precompute_distances=precompute_distances,
-                                   x_squared_norms=x_squared_norms,
-                                   # Change seed to ensure variety
-                                   random_state=seed)
-            for seed in seeds)
-        # Get results with the lowest inertia
-        labels, inertia, centers, n_iters = zip(*results)
-        best = np.argmin(inertia)
-        best_labels = labels[best]
-        best_inertia = inertia[best]
-        best_centers = centers[best]
-        best_n_iter = n_iters[best]
-
-    if not sp.issparse(X):
-        if not copy_x:
-            X += X_mean
-        best_centers += X_mean
-
-    distinct_clusters = len(set(best_labels))
-    if distinct_clusters < n_clusters:
-        warnings.warn("Number of distinct clusters ({}) found smaller than "
-                      "n_clusters ({}). Possibly due to duplicate points "
-                      "in X.".format(distinct_clusters, n_clusters),
-                      ConvergenceWarning, stacklevel=2)
 
+    est = KMeans(
+        n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter,
+        verbose=verbose, precompute_distances=precompute_distances, tol=tol,
+        random_state=random_state, copy_x=copy_x, n_jobs=n_jobs,
+        algorithm=algorithm
+    ).fit(X, sample_weight=sample_weight)
     if return_n_iter:
-        return best_centers, best_labels, best_inertia, best_n_iter
+        return est.cluster_centers_, est.labels_, est.inertia_, est.n_iter_
     else:
-        return best_centers, best_labels, best_inertia
+        return est.cluster_centers_, est.labels_, est.inertia_
 
 
 def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
@@ -953,15 +839,144 @@ def fit(self, X, y=None, sample_weight=None):
         """
         random_state = check_random_state(self.random_state)
 
-        self.cluster_centers_, self.labels_, self.inertia_, self.n_iter_ = \
-            k_means(
-                X, n_clusters=self.n_clusters, sample_weight=sample_weight,
-                init=self.init, n_init=self.n_init,
-                max_iter=self.max_iter, verbose=self.verbose,
-                precompute_distances=self.precompute_distances,
-                tol=self.tol, random_state=random_state, copy_x=self.copy_x,
-                n_jobs=self.n_jobs, algorithm=self.algorithm,
-                return_n_iter=True)
+        n_init = self.n_init
+        if n_init <= 0:
+            raise ValueError("Invalid number of initializations."
+                             " n_init=%d must be bigger than zero." % n_init)
+
+        if self.max_iter <= 0:
+            raise ValueError(
+                'Number of iterations should be a positive number,'
+                ' got %d instead' % self.max_iter
+            )
+
+        # avoid forcing order when copy_x=False
+        order = "C" if self.copy_x else None
+        X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],
+                        order=order, copy=self.copy_x)
+        # verify that the number of samples given is larger than k
+        if _num_samples(X) < self.n_clusters:
+            raise ValueError("n_samples=%d should be >= n_clusters=%d" % (
+                _num_samples(X), self.n_clusters))
+
+        tol = _tolerance(X, self.tol)
+
+        # If the distances are precomputed every job will create a matrix of
+        # shape (n_clusters, n_samples). To stop KMeans from eating up memory
+        # we only activate this if the created matrix is guaranteed to be
+        # under 100MB. 12 million entries consume a little under 100MB if they
+        # are of type double.
+        precompute_distances = self.precompute_distances
+        if precompute_distances == 'auto':
+            n_samples = X.shape[0]
+            precompute_distances = (self.n_clusters * n_samples) < 12e6
+        elif isinstance(precompute_distances, bool):
+            pass
+        else:
+            raise ValueError(
+                "precompute_distances should be 'auto' or True/False"
+                ", but a value of %r was passed" %
+                precompute_distances
+            )
+
+        # Validate init array
+        init = self.init
+        if hasattr(init, '__array__'):
+            init = check_array(init, dtype=X.dtype.type, copy=True)
+            _validate_center_shape(X, self.n_clusters, init)
+
+            if n_init != 1:
+                warnings.warn(
+                    'Explicit initial center position passed: '
+                    'performing only one init in k-means instead of n_init=%d'
+                    % n_init, RuntimeWarning, stacklevel=2)
+                n_init = 1
+
+        # subtract of mean of x for more accurate distance computations
+        if not sp.issparse(X):
+            X_mean = X.mean(axis=0)
+            # The copy was already done above
+            X -= X_mean
+
+            if hasattr(init, '__array__'):
+                init -= X_mean
+
+        # precompute squared norms of data points
+        x_squared_norms = row_norms(X, squared=True)
+
+        best_labels, best_inertia, best_centers = None, None, None
+        algorithm = self.algorithm
+        if self.n_clusters == 1:
+            # elkan doesn't make sense for a single cluster, full will produce
+            # the right result.
+            algorithm = "full"
+        if algorithm == "auto":
+            algorithm = "full" if sp.issparse(X) else 'elkan'
+        if algorithm == "full":
+            kmeans_single = _kmeans_single_lloyd
+        elif algorithm == "elkan":
+            kmeans_single = _kmeans_single_elkan
+        else:
+            raise ValueError("Algorithm must be 'auto', 'full' or 'elkan', got"
+                             " %s" % str(algorithm))
+
+        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
+        if effective_n_jobs(self.n_jobs) == 1:
+            # For a single thread, less memory is needed if we just store one
+            # set of the best results (as opposed to one set per run per
+            # thread).
+            for seed in seeds:
+                # run a k-means once
+                labels, inertia, centers, n_iter_ = kmeans_single(
+                    X, sample_weight, self.n_clusters,
+                    max_iter=self.max_iter, init=init, verbose=self.verbose,
+                    precompute_distances=precompute_distances, tol=tol,
+                    x_squared_norms=x_squared_norms, random_state=seed)
+                # determine if these results are the best so far
+                if best_inertia is None or inertia < best_inertia:
+                    best_labels = labels.copy()
+                    best_centers = centers.copy()
+                    best_inertia = inertia
+                    best_n_iter = n_iter_
+        else:
+            # parallelisation of k-means runs
+            results = Parallel(n_jobs=self.n_jobs, verbose=0)(
+                delayed(kmeans_single)(
+                    X, sample_weight, self.n_clusters,
+                    max_iter=self.max_iter, init=init,
+                    verbose=self.verbose, tol=tol,
+                    precompute_distances=precompute_distances,
+                    x_squared_norms=x_squared_norms,
+                    # Change seed to ensure variety
+                    random_state=seed
+                )
+                for seed in seeds)
+            # Get results with the lowest inertia
+            labels, inertia, centers, n_iters = zip(*results)
+            best = np.argmin(inertia)
+            best_labels = labels[best]
+            best_inertia = inertia[best]
+            best_centers = centers[best]
+            best_n_iter = n_iters[best]
+
+        if not sp.issparse(X):
+            if not self.copy_x:
+                X += X_mean
+            best_centers += X_mean
+
+        distinct_clusters = len(set(best_labels))
+        if distinct_clusters < self.n_clusters:
+            warnings.warn(
+                "Number of distinct clusters ({}) found smaller than "
+                "n_clusters ({}). Possibly due to duplicate points "
+                "in X.".format(distinct_clusters, self.n_clusters),
+                ConvergenceWarning, stacklevel=2
+            )
+
+        self.cluster_centers_ = best_centers
+        self.labels_ = best_labels
+        self.inertia_ = best_inertia
+        self.n_iter_ = best_n_iter
         return self
 
     def fit_predict(self, X, y=None, sample_weight=None):

From c289e2716bcaffc04d88f88228426734f386d418 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 16 Sep 2019 13:39:53 +0200
Subject: [PATCH 68/86] EHN update lobpcg from scipy master (#14971)

---
 sklearn/externals/_lobpcg.py | 422 ++++++++++++++++++++---------------
 sklearn/utils/fixes.py       |   6 +-
 2 files changed, 247 insertions(+), 181 deletions(-)

diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py
index 30492c97c182b..4e0d0ad19b753 100644
--- a/sklearn/externals/_lobpcg.py
+++ b/sklearn/externals/_lobpcg.py
@@ -21,9 +21,8 @@
 """
 
 from __future__ import division, print_function, absolute_import
-import warnings
 import numpy as np
-from scipy.linalg import (inv, eigh, cho_factor, cho_solve, cholesky,
+from scipy.linalg import (inv, eigh, cho_factor, cho_solve, cholesky, orth,
                           LinAlgError)
 from scipy.sparse.linalg import aslinearoperator
 
@@ -31,6 +30,7 @@
 
 
 def bmat(*args, **kwargs):
+    import warnings
     with warnings.catch_warnings(record=True):
         warnings.filterwarnings(
             'ignore', '.*the matrix subclass is not the recommended way.*')
@@ -42,19 +42,20 @@ def _save(ar, fileName):
     np.savetxt(fileName, ar)
 
 
-def _report_nonhermitian(M, a, b, name):
+def _report_nonhermitian(M, name):
     """
-    Report if `M` is not a hermitian matrix given the tolerances `a`, `b`.
+    Report if `M` is not a hermitian matrix given its type.
     """
     from scipy.linalg import norm
 
     md = M - M.T.conj()
 
     nmd = norm(md, 1)
-    tol = np.spacing(max(10**a, (10**b)*norm(M, 1)))
+    tol = 10 * np.finfo(M.dtype).eps
+    tol = max(tol, tol * norm(M, 1))
     if nmd > tol:
-        print('matrix %s is not sufficiently Hermitian for a=%d, b=%d:'
-              % (name, a, b))
+        print('matrix %s of the type %s is not sufficiently Hermitian:'
+              % (name, M.dtype))
         print('condition: %.e < %e' % (nmd, tol))
 
 
@@ -88,29 +89,42 @@ def _makeOperator(operatorInput, expectedShape):
 
 def _applyConstraints(blockVectorV, factYBY, blockVectorBY, blockVectorY):
     """Changes blockVectorV in place."""
-    gramYBV = np.dot(blockVectorBY.T.conj(), blockVectorV)
-    tmp = cho_solve(factYBY, gramYBV)
+    YBV = np.dot(blockVectorBY.T.conj(), blockVectorV)
+    tmp = cho_solve(factYBY, YBV)
     blockVectorV -= np.dot(blockVectorY, tmp)
 
 
 def _b_orthonormalize(B, blockVectorV, blockVectorBV=None, retInvR=False):
+    """B-orthonormalize the given block vector using Cholesky."""
+    normalization = blockVectorV.max(axis=0)+np.finfo(blockVectorV.dtype).eps
+    blockVectorV = blockVectorV / normalization
     if blockVectorBV is None:
         if B is not None:
             blockVectorBV = B(blockVectorV)
         else:
             blockVectorBV = blockVectorV  # Shared data!!!
-    gramVBV = np.dot(blockVectorV.T.conj(), blockVectorBV)
-    gramVBV = cholesky(gramVBV)
-    gramVBV = inv(gramVBV, overwrite_a=True)
-    # gramVBV is now R^{-1}.
-    blockVectorV = np.dot(blockVectorV, gramVBV)
-    if B is not None:
-        blockVectorBV = np.dot(blockVectorBV, gramVBV)
     else:
+        blockVectorBV = blockVectorBV / normalization
+    VBV = np.matmul(blockVectorV.T.conj(), blockVectorBV)
+    try:
+        # VBV is a Cholesky factor from now on...
+        VBV = cholesky(VBV, overwrite_a=True)
+        VBV = inv(VBV, overwrite_a=True)
+        blockVectorV = np.matmul(blockVectorV, VBV)
+        # blockVectorV = (cho_solve((VBV.T, True), blockVectorV.T)).T
+        if B is not None:
+            blockVectorBV = np.matmul(blockVectorBV, VBV)
+            # blockVectorBV = (cho_solve((VBV.T, True), blockVectorBV.T)).T
+        else:
+            blockVectorBV = None
+    except LinAlgError:
+        # raise ValueError('Cholesky has failed')
+        blockVectorV = None
         blockVectorBV = None
+        VBV = None
 
     if retInvR:
-        return blockVectorV, blockVectorBV, gramVBV
+        return blockVectorV, blockVectorBV, VBV, normalization
     else:
         return blockVectorV, blockVectorBV
 
@@ -141,113 +155,65 @@ def lobpcg(A, X,
     A : {sparse matrix, dense matrix, LinearOperator}
         The symmetric linear operator of the problem, usually a
         sparse matrix.  Often called the "stiffness matrix".
-    X : array_like
-        Initial approximation to the k eigenvectors. If A has
-        shape=(n,n) then X should have shape shape=(n,k).
+    X : ndarray, float32 or float64
+        Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`
+        has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.
     B : {dense matrix, sparse matrix, LinearOperator}, optional
-        the right hand side operator in a generalized eigenproblem.
-        by default, B = Identity
-        often called the "mass matrix"
+        The right hand side operator in a generalized eigenproblem.
+        By default, ``B = Identity``.  Often called the "mass matrix".
     M : {dense matrix, sparse matrix, LinearOperator}, optional
-        preconditioner to A; by default M = Identity
-        M should approximate the inverse of A
-    Y : array_like, optional
-        n-by-sizeY matrix of constraints, sizeY < n
+        Preconditioner to `A`; by default ``M = Identity``.
+        `M` should approximate the inverse of `A`.
+    Y : ndarray, float32 or float64, optional
+        n-by-sizeY matrix of constraints (non-sparse), sizeY < n
         The iterations will be performed in the B-orthogonal complement
         of the column-space of Y. Y must be full rank.
     tol : scalar, optional
-        Solver tolerance (stopping criterion)
-        by default: tol=n*sqrt(eps)
-    maxiter : integer, optional
-        maximum number of iterations
-        by default: maxiter=min(n,20)
+        Solver tolerance (stopping criterion).
+        The default is ``tol=n*sqrt(eps)``.
+    maxiter : int, optional
+        Maximum number of iterations.  The default is ``maxiter=min(n, 20)``.
     largest : bool, optional
-        when True, solve for the largest eigenvalues, otherwise the smallest
-    verbosityLevel : integer, optional
-        controls solver output.  default: verbosityLevel = 0.
-    retLambdaHistory : boolean, optional
-        whether to return eigenvalue history
-    retResidualNormsHistory : boolean, optional
-        whether to return history of residual norms
+        When True, solve for the largest eigenvalues, otherwise the smallest.
+    verbosityLevel : int, optional
+        Controls solver output.  The default is ``verbosityLevel=0``.
+    retLambdaHistory : bool, optional
+        Whether to return eigenvalue history.  Default is False.
+    retResidualNormsHistory : bool, optional
+        Whether to return history of residual norms.  Default is False.
 
     Returns
     -------
-    w : array
-        Array of k eigenvalues
-    v : array
-        An array of k eigenvectors.  V has the same shape as X.
-    lambdas : list of arrays, optional
+    w : ndarray
+        Array of ``k`` eigenvalues
+    v : ndarray
+        An array of ``k`` eigenvectors.  `v` has the same shape as `X`.
+    lambdas : list of ndarray, optional
         The eigenvalue history, if `retLambdaHistory` is True.
-    rnorms : list of arrays, optional
+    rnorms : list of ndarray, optional
         The history of residual norms, if `retResidualNormsHistory` is True.
 
-    Examples
-    --------
-
-    Solve A x = lambda B x with constraints and preconditioning.
-
-    >>> from scipy.sparse import spdiags, issparse
-    >>> from scipy.sparse.linalg import lobpcg, LinearOperator
-    >>> n = 100
-    >>> vals = [np.arange(n, dtype=np.float64) + 1]
-    >>> A = spdiags(vals, 0, n, n)
-    >>> A.toarray()
-    array([[  1.,   0.,   0., ...,   0.,   0.,   0.],
-           [  0.,   2.,   0., ...,   0.,   0.,   0.],
-           [  0.,   0.,   3., ...,   0.,   0.,   0.],
-           ...,
-           [  0.,   0.,   0., ...,  98.,   0.,   0.],
-           [  0.,   0.,   0., ...,   0.,  99.,   0.],
-           [  0.,   0.,   0., ...,   0.,   0., 100.]])
-
-    Constraints.
-
-    >>> Y = np.eye(n, 3)
-
-    Initial guess for eigenvectors, should have linearly independent
-    columns. Column dimension = number of requested eigenvalues.
-
-    >>> X = np.random.rand(n, 3)
-
-    Preconditioner -- inverse of A (as an abstract linear operator).
-
-    >>> invA = spdiags([1./vals[0]], 0, n, n)
-    >>> def precond( x ):
-    ...     return invA  * x
-    >>> M = LinearOperator(matvec=precond, shape=(n, n), dtype=float)
-
-    Here, ``invA`` could of course have been used directly as a preconditioner.
-    Let us then solve the problem:
-
-    >>> eigs, vecs = lobpcg(A, X, Y=Y, M=M, largest=False)
-    >>> eigs
-    array([4., 5., 6.])
-
-    Note that the vectors passed in Y are the eigenvectors of the 3 smallest
-    eigenvalues. The results returned are orthogonal to those.
-
     Notes
     -----
-    If both retLambdaHistory and retResidualNormsHistory are True,
+    If both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,
     the return tuple has the following format
-    (lambda, V, lambda history, residual norms history).
+    ``(lambda, V, lambda history, residual norms history)``.
 
     In the following ``n`` denotes the matrix size and ``m`` the number
     of required eigenvalues (smallest or largest).
 
-    The LOBPCG code internally solves eigenproblems of the size 3``m`` on every
+    The LOBPCG code internally solves eigenproblems of the size ``3m`` on every
     iteration by calling the "standard" dense eigensolver, so if ``m`` is not
     small enough compared to ``n``, it does not make sense to call the LOBPCG
-    code, but rather one should use the "standard" eigensolver,
-    e.g. numpy or scipy function in this case.
-    If one calls the LOBPCG algorithm for 5``m``>``n``,
-    it will most likely break internally, so the code tries to call
-    the standard function instead.
-
-    It is not that n should be large for the LOBPCG to work, but rather the
-    ratio ``n``/``m`` should be large. It you call LOBPCG with ``m``=1
-    and ``n``=10, it works though ``n`` is small. The method is intended
-    for extremely large ``n``/``m``, see e.g., reference [28] in
+    code, but rather one should use the "standard" eigensolver, e.g. numpy or
+    scipy function in this case.
+    If one calls the LOBPCG algorithm for ``5m > n``, it will most likely break
+    internally, so the code tries to call the standard function instead.
+
+    It is not that ``n`` should be large for the LOBPCG to work, but rather the
+    ratio ``n / m`` should be large. It you call LOBPCG with ``m=1``
+    and ``n=10``, it works though ``n`` is small. The method is intended
+    for extremely large ``n / m``, see e.g., reference [28] in
     https://arxiv.org/abs/0705.2626
 
     The convergence speed depends basically on two factors:
@@ -260,13 +226,7 @@ def lobpcg(A, X,
        directory) is ill-conditioned for large ``n``, so convergence will be
        slow, unless efficient preconditioning is used. For this specific
        problem, a good simple preconditioner function would be a linear solve
-       for A, which is easy to code since A is tridiagonal.
-
-    *Acknowledgements*
-
-    lobpcg.py code was written by Robert Cimrman.
-    Many thanks belong to Andrew Knyazev, the author of the algorithm,
-    for lots of advice and support.
+       for `A`, which is easy to code since A is tridiagonal.
 
     References
     ----------
@@ -282,6 +242,62 @@ def lobpcg(A, X,
 
     .. [3] A. V. Knyazev's C and MATLAB implementations:
            https://bitbucket.org/joseroman/blopex
+
+    Examples
+    --------
+
+    Solve ``A x = lambda x`` with constraints and preconditioning.
+
+    >>> import numpy as np
+    >>> from scipy.sparse import spdiags, issparse
+    >>> from scipy.sparse.linalg import lobpcg, LinearOperator
+    >>> n = 100
+    >>> vals = np.arange(1, n + 1)
+    >>> A = spdiags(vals, 0, n, n)
+    >>> A.toarray()
+    array([[  1.,   0.,   0., ...,   0.,   0.,   0.],
+           [  0.,   2.,   0., ...,   0.,   0.,   0.],
+           [  0.,   0.,   3., ...,   0.,   0.,   0.],
+           ...,
+           [  0.,   0.,   0., ...,  98.,   0.,   0.],
+           [  0.,   0.,   0., ...,   0.,  99.,   0.],
+           [  0.,   0.,   0., ...,   0.,   0., 100.]])
+
+    Constraints:
+
+    >>> Y = np.eye(n, 3)
+
+    Initial guess for eigenvectors, should have linearly independent
+    columns. Column dimension = number of requested eigenvalues.
+
+    >>> X = np.random.rand(n, 3)
+
+    Preconditioner in the inverse of A in this example:
+
+    >>> invA = spdiags([1./vals], 0, n, n)
+
+    The preconditiner must be defined by a function:
+
+    >>> def precond( x ):
+    ...     return invA @ x
+
+    The argument x of the preconditioner function is a matrix inside `lobpcg`,
+    thus the use of matrix-matrix product ``@``.
+
+    The preconditioner function is passed to lobpcg as a `LinearOperator`:
+
+    >>> M = LinearOperator(matvec=precond, matmat=precond,
+    ...                    shape=(n, n), dtype=float)
+
+    Let us now solve the eigenvalue problem for the matrix A:
+
+    >>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)
+    >>> eigenvalues
+    array([4., 5., 6.])
+
+    Note that the vectors passed in Y are the eigenvectors of the 3 smallest
+    eigenvalues. The results returned are orthogonal to those.
+
     """
     blockVectorX = X
     blockVectorY = Y
@@ -411,6 +427,8 @@ def lobpcg(A, X,
     blockVectorBP = None
 
     iterationNumber = -1
+    restart = True
+    explicitGramFlag = False
     while iterationNumber < maxIterations:
         iterationNumber += 1
         if verbosityLevel > 0:
@@ -418,13 +436,12 @@ def lobpcg(A, X,
 
         if B is not None:
             aux = blockVectorBX * _lambda[np.newaxis, :]
-
         else:
             aux = blockVectorX * _lambda[np.newaxis, :]
 
         blockVectorR = blockVectorAX - aux
 
-        aux = np.sum(blockVectorR.conjugate() * blockVectorR, 0)
+        aux = np.sum(blockVectorR.conj() * blockVectorR, 0)
         residualNorms = np.sqrt(aux)
 
         residualNormsHistory.append(residualNorms)
@@ -468,8 +485,20 @@ def lobpcg(A, X,
                               gramYBY, blockVectorBY, blockVectorY)
 
         ##
-        # B-orthonormalize the preconditioned residuals.
+        # B-orthogonalize the preconditioned residuals to X.
+        if B is not None:
+            activeBlockVectorR = activeBlockVectorR - \
+                np.matmul(blockVectorX,
+                          np.matmul(blockVectorBX.T.conj(),
+                                    activeBlockVectorR))
+        else:
+            activeBlockVectorR = activeBlockVectorR - \
+                np.matmul(blockVectorX,
+                          np.matmul(blockVectorX.T.conj(),
+                                    activeBlockVectorR))
 
+        ##
+        # B-orthonormalize the preconditioned residuals.
         aux = _b_orthonormalize(B, activeBlockVectorR)
         activeBlockVectorR, activeBlockVectorBR = aux
 
@@ -479,80 +508,112 @@ def lobpcg(A, X,
             if B is not None:
                 aux = _b_orthonormalize(B, activeBlockVectorP,
                                         activeBlockVectorBP, retInvR=True)
-                activeBlockVectorP, activeBlockVectorBP, invR = aux
-                activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)
-
+                activeBlockVectorP, activeBlockVectorBP, invR, normal = aux
             else:
                 aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True)
-                activeBlockVectorP, _, invR = aux
+                activeBlockVectorP, _, invR, normal = aux
+            # Function _b_orthonormalize returns None if Cholesky fails
+            if activeBlockVectorP is not None:
+                activeBlockVectorAP = activeBlockVectorAP / normal
                 activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)
+                restart = False
+            else:
+                restart = True
 
         ##
         # Perform the Rayleigh Ritz Procedure:
         # Compute symmetric Gram matrices:
 
-        if B is not None:
-            xaw = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)
-            waw = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
-            xbw = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)
-
-            if iterationNumber > 0:
-                xap = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
-                wap = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)
-                pap = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)
-                xbp = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)
-                wbp = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)
-
-                gramA = bmat([[np.diag(_lambda), xaw, xap],
-                              [xaw.T.conj(), waw, wap],
-                              [xap.T.conj(), wap.T.conj(), pap]])
-
-                gramB = bmat([[ident0, xbw, xbp],
-                              [xbw.T.conj(), ident, wbp],
-                              [xbp.T.conj(), wbp.T.conj(), ident]])
-            else:
-                gramA = bmat([[np.diag(_lambda), xaw],
-                              [xaw.T.conj(), waw]])
-                gramB = bmat([[ident0, xbw],
-                              [xbw.T.conj(), ident]])
-
+        if activeBlockVectorAR.dtype == 'float32':
+            myeps = 1
+        elif activeBlockVectorR.dtype == 'float32':
+            myeps = 1e-4
         else:
-            xaw = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)
-            waw = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
-            xbw = np.dot(blockVectorX.T.conj(), activeBlockVectorR)
-
-            if iterationNumber > 0:
-                xap = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
-                wap = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)
-                pap = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)
-                xbp = np.dot(blockVectorX.T.conj(), activeBlockVectorP)
-                wbp = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorP)
-
-                gramA = bmat([[np.diag(_lambda), xaw, xap],
-                              [xaw.T.conj(), waw, wap],
-                              [xap.T.conj(), wap.T.conj(), pap]])
-
-                gramB = bmat([[ident0, xbw, xbp],
-                              [xbw.T.conj(), ident, wbp],
-                              [xbp.T.conj(), wbp.T.conj(), ident]])
-            else:
-                gramA = bmat([[np.diag(_lambda), xaw],
-                              [xaw.T.conj(), waw]])
-                gramB = bmat([[ident0, xbw],
-                              [xbw.T.conj(), ident]])
+            myeps = 1e-8
 
-        if verbosityLevel > 0:
-            _report_nonhermitian(gramA, 3, -1, 'gramA')
-            _report_nonhermitian(gramB, 3, -1, 'gramB')
+        if residualNorms.max() > myeps and not explicitGramFlag:
+            explicitGramFlag = False
+        else:
+            # Once explicitGramFlag, forever explicitGramFlag.
+            explicitGramFlag = True
 
-        if verbosityLevel > 10:
-            _save(gramA, 'gramA')
-            _save(gramB, 'gramB')
+        # Shared memory assingments to simplify the code
+        if B is None:
+            blockVectorBX = blockVectorX
+            activeBlockVectorBR = activeBlockVectorR
+            if not restart:
+                activeBlockVectorBP = activeBlockVectorP
+
+        # Common submatrices:
+        gramXAR = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)
+        gramRAR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)
+
+        if explicitGramFlag:
+            gramRAR = (gramRAR + gramRAR.T.conj())/2
+            gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)
+            gramXAX = (gramXAX + gramXAX.T.conj())/2
+            gramXBX = np.dot(blockVectorX.T.conj(), blockVectorBX)
+            gramRBR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBR)
+            gramXBR = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)
+        else:
+            gramXAX = np.diag(_lambda)
+            gramXBX = ident0
+            gramRBR = ident
+            gramXBR = np.zeros((sizeX, currentBlockSize), dtype=A.dtype)
+
+        def _handle_gramA_gramB_verbosity(gramA, gramB):
+            if verbosityLevel > 0:
+                _report_nonhermitian(gramA, 'gramA')
+                _report_nonhermitian(gramB, 'gramB')
+            if verbosityLevel > 10:
+                # Note: not documented, but leave it in here for now
+                np.savetxt('gramA.txt', gramA)
+                np.savetxt('gramB.txt', gramB)
+
+        if not restart:
+            gramXAP = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)
+            gramRAP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)
+            gramPAP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)
+            gramXBP = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)
+            gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)
+            if explicitGramFlag:
+                gramPAP = (gramPAP + gramPAP.T.conj())/2
+                gramPBP = np.dot(activeBlockVectorP.T.conj(),
+                                 activeBlockVectorBP)
+            else:
+                gramPBP = ident
+
+            gramA = bmat([[gramXAX, gramXAR, gramXAP],
+                          [gramXAR.T.conj(), gramRAR, gramRAP],
+                          [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP]])
+            gramB = bmat([[gramXBX, gramXBR, gramXBP],
+                          [gramXBR.T.conj(), gramRBR, gramRBP],
+                          [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP]])
+
+            _handle_gramA_gramB_verbosity(gramA, gramB)
+
+            try:
+                _lambda, eigBlockVector = eigh(gramA, gramB,
+                                               check_finite=False)
+            except LinAlgError:
+                # try again after dropping the direction vectors P from RR
+                restart = True
+
+        if restart:
+            gramA = bmat([[gramXAX, gramXAR],
+                          [gramXAR.T.conj(), gramRAR]])
+            gramB = bmat([[gramXBX, gramXBR],
+                          [gramXBR.T.conj(), gramRBR]])
+
+            _handle_gramA_gramB_verbosity(gramA, gramB)
+
+            try:
+                _lambda, eigBlockVector = eigh(gramA, gramB,
+                                               check_finite=False)
+            except LinAlgError:
+                raise ValueError('eigh has failed in lobpcg iterations')
 
-        # Solve the generalized eigenvalue problem.
-        _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False)
         ii = _get_indx(_lambda, sizeX, largest)
-
         if verbosityLevel > 10:
             print(ii)
             print(_lambda)
@@ -565,7 +626,7 @@ def lobpcg(A, X,
         if verbosityLevel > 10:
             print('lambda:', _lambda)
 #         # Normalize eigenvectors!
-#         aux = np.sum( eigBlockVector.conjugate() * eigBlockVector, 0 )
+#         aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 )
 #         eigVecNorms = np.sqrt( aux )
 #         eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]
 #         eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )
@@ -575,7 +636,7 @@ def lobpcg(A, X,
 
         # Compute Ritz vectors.
         if B is not None:
-            if iterationNumber > 0:
+            if not restart:
                 eigBlockVectorX = eigBlockVector[:sizeX]
                 eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
                 eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
@@ -608,7 +669,7 @@ def lobpcg(A, X,
             blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp
 
         else:
-            if iterationNumber > 0:
+            if not restart:
                 eigBlockVectorX = eigBlockVector[:sizeX]
                 eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]
                 eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]
@@ -642,9 +703,14 @@ def lobpcg(A, X,
 
     blockVectorR = blockVectorAX - aux
 
-    aux = np.sum(blockVectorR.conjugate() * blockVectorR, 0)
+    aux = np.sum(blockVectorR.conj() * blockVectorR, 0)
     residualNorms = np.sqrt(aux)
 
+    # Future work: Need to add Postprocessing here:
+    # Making sure eigenvectors "exactly" satisfy the blockVectorY constrains?
+    # Making sure eigenvecotrs are "exactly" othonormalized by final "exact" RR
+    # Computing the actual true residuals
+
     if verbosityLevel > 0:
         print('final eigenvalue:', _lambda)
         print('final residual norms:', residualNorms)
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 83ace7d2e76c6..1d7d28a72c2e3 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -38,11 +38,11 @@ def _parse_version(version_string):
 except ImportError:
     from scipy.misc import comb, logsumexp  # noqa
 
-if sp_version >= (1, 3):
+if sp_version >= (1, 4):
     from scipy.sparse.linalg import lobpcg
 else:
-    # Backport of lobpcg functionality from scipy 1.3.0, can be removed
-    # once support for sp_version < (1, 3) is dropped
+    # Backport of lobpcg functionality from scipy 1.4.0, can be removed
+    # once support for sp_version < (1, 4) is dropped
     from ..externals._lobpcg import lobpcg  # noqa
 
 if sp_version >= (1, 3):

From 27bfcc8d495c6f296756e5a4bcc6665e14627d14 Mon Sep 17 00:00:00 2001
From: DrGFreeman <DrGFreeman@users.noreply.github.com>
Date: Mon, 16 Sep 2019 07:58:02 -0400
Subject: [PATCH 69/86] FIX implement repr for RepeatedKFold and
 RepeatedStratifiedKFold (#14983)

---
 sklearn/model_selection/_split.py           |  5 +++++
 sklearn/model_selection/tests/test_split.py | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index c49a3ce6aea4e..ceee1a08146ab 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1163,6 +1163,9 @@ def get_n_splits(self, X=None, y=None, groups=None):
                      **self.cvargs)
         return cv.get_n_splits(X, y, groups) * self.n_repeats
 
+    def __repr__(self):
+        return _build_repr(self)
+
 
 class RepeatedKFold(_RepeatedSplits):
     """Repeated K-Fold cross validator.
@@ -2158,6 +2161,8 @@ def _build_repr(self):
         try:
             with warnings.catch_warnings(record=True) as w:
                 value = getattr(self, key, None)
+                if value is None and hasattr(self, 'cvargs'):
+                    value = self.cvargs.get(key, None)
             if len(w) and w[0].category == DeprecationWarning:
                 # if the parameter is deprecated, don't show it
                 continue
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index a38fb435c9db4..4aa47a753bb1d 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -980,6 +980,17 @@ def test_repeated_cv_value_errors():
         assert_raises(ValueError, cv, n_repeats=1.5)
 
 
+@pytest.mark.parametrize(
+    "RepeatedCV", [RepeatedKFold, RepeatedStratifiedKFold]
+)
+def test_repeated_cv_repr(RepeatedCV):
+    n_splits, n_repeats = 2, 6
+    repeated_cv = RepeatedCV(n_splits=n_splits, n_repeats=n_repeats)
+    repeated_cv_repr = ('{}(n_repeats=6, n_splits=2, random_state=None)'
+                        .format(repeated_cv.__class__.__name__))
+    assert repeated_cv_repr == repr(repeated_cv)
+
+
 def test_repeated_kfold_determinstic_split():
     X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
     random_state = 258173307

From a846badbd18d153e616b13346a2ad2f48005f9b8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 16 Sep 2019 14:24:30 +0200
Subject: [PATCH 70/86] address comments from Nicolas

---
 sklearn/inspection/partial_dependence.py            | 8 +++++---
 sklearn/inspection/tests/test_partial_dependence.py | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index f7cd230b7a637..ced6600c358e9 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -361,10 +361,12 @@ def partial_dependence(estimator, X, features, response_method='auto',
 
     if _determine_key_type(features) == 'int':
         raise_err = False
-        if isinstance(features, Iterable):
+        if (isinstance(features, Iterable) or
+                isinstance(features, numbers.Integral)):
+            # _get_column_indices() support negative indexing. Here, we limit
+            # the indexing to be positive. The upper bound will be checked
+            # by _get_column_indices()
             raise_err = np.all(np.less(features, 0))
-        elif isinstance(features, numbers.Integral):
-            raise_err = features < 0
 
         if raise_err:
             raise ValueError(
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 36d63cf48e062..622cf0804e807 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -368,10 +368,10 @@ def test_partial_dependence_unknown_feature_string(estimator):
     df = pd.DataFrame(X)
     estimator.fit(df, y)
 
-    features = 'random'
+    features = ['random']
     err_msg = 'A given column is not a column of the dataframe'
     with pytest.raises(ValueError, match=err_msg):
-        partial_dependence(estimator, df, [features])
+        partial_dependence(estimator, df, features)
 
 
 @pytest.mark.parametrize(

From 09e589904c13408264f433e383cab96b152f0a96 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 20 Sep 2019 11:47:07 +0200
Subject: [PATCH 71/86] support indices in tuple in safe_indexing

---
 .../tests/test_plot_partial_dependence.py     | 13 +++++++++++
 sklearn/utils/__init__.py                     |  8 +++++--
 sklearn/utils/tests/test_utils.py             | 22 ++++++++++++++-----
 3 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/sklearn/inspection/tests/test_plot_partial_dependence.py b/sklearn/inspection/tests/test_plot_partial_dependence.py
index bc0568b058be4..29150168bdba5 100644
--- a/sklearn/inspection/tests/test_plot_partial_dependence.py
+++ b/sklearn/inspection/tests/test_plot_partial_dependence.py
@@ -303,6 +303,19 @@ def test_plot_partial_dependence_multioutput(pyplot, target):
         assert ax.get_xlabel() == "{}".format(i)
 
 
+def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
+    pd = pytest.importorskip('pandas')
+    df = pd.DataFrame(boston.data, columns=boston.feature_names)
+    y = boston.target
+
+    grid_resolution = 25
+
+    plot_partial_dependence(
+        clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution,
+        feature_names=df.columns.tolist()
+    )
+
+
 dummy_classification_data = make_classification(random_state=0)
 
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index c26fab41bfc93..22e968d3ea38c 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -188,6 +188,8 @@ def _array_indexing(array, key, key_dtype, axis):
         # check if we have an boolean array-likes to make the proper indexing
         if key_dtype == 'bool':
             key = np.asarray(key)
+    if isinstance(key, tuple):
+        key = list(key)
     return array[key] if axis == 0 else array[:, key]
 
 
@@ -198,6 +200,8 @@ def _pandas_indexing(X, key, key_dtype, axis):
         # FIXME: solved in pandas 0.25
         key = np.asarray(key)
         key = key if key.flags.writeable else key.copy()
+    elif isinstance(key, tuple):
+        key = list(key)
     # check whether we should index with loc or iloc
     indexer = X.iloc if key_dtype == 'int' else X.loc
     return indexer[:, key] if axis else indexer[key]
@@ -254,7 +258,7 @@ def _determine_key_type(key):
         if key_start_type is not None:
             return key_start_type
         return key_stop_type
-    if isinstance(key, list):
+    if isinstance(key, (list, tuple)):
         unique_key = set(key)
         key_type = {_determine_key_type(elt) for elt in unique_key}
         if not key_type:
@@ -352,7 +356,7 @@ def _get_column_indices(X, key):
 
     key_dtype = _determine_key_type(key)
 
-    if isinstance(key, list) and not key:
+    if isinstance(key, (list, tuple)) and not key:
         # we get an empty list
         return []
     elif key_dtype in ('bool', 'int'):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 363b77a44b5fc..b96702985aa1b 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -205,15 +205,19 @@ def test_column_or_1d():
      (np.bool_(True), 'bool'),
      ([0, 1, 2], 'int'),
      (['0', '1', '2'], 'str'),
+     ((0, 1, 2), 'int'),
+     (('0', '1', '2'), 'str'),
      (slice(None, None), None),
      (slice(0, 2), 'int'),
      (np.array([0, 1, 2], dtype=np.int32), 'int'),
      (np.array([0, 1, 2], dtype=np.int64), 'int'),
      (np.array([0, 1, 2], dtype=np.uint8), 'int'),
      ([True, False], 'bool'),
+     ((True, False), 'bool'),
      (np.array([True, False]), 'bool'),
      ('col_0', 'str'),
      (['col_0', 'col_1', 'col_2'], 'str'),
+     (('col_0', 'col_1', 'col_2'), 'str'),
      (slice('begin', 'end'), 'str'),
      (np.array(['col_0', 'col_1', 'col_2']), 'str'),
      (np.array(['col_0', 'col_1', 'col_2'], dtype=object), 'str')]
@@ -230,6 +234,8 @@ def test_determine_key_type_error():
 def _convert_container(container, constructor_name, columns_name=None):
     if constructor_name == 'list':
         return list(container)
+    elif constructor_name == 'tuple':
+        return tuple(container)
     elif constructor_name == 'array':
         return np.asarray(container)
     elif constructor_name == 'sparse':
@@ -247,7 +253,9 @@ def _convert_container(container, constructor_name, columns_name=None):
 @pytest.mark.parametrize(
     "array_type", ["list", "array", "sparse", "dataframe"]
 )
-@pytest.mark.parametrize("indices_type", ["list", "array", "series", "slice"])
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
 def test_safe_indexing_2d_container_axis_0(array_type, indices_type):
     indices = [1, 2]
     if indices_type == 'slice' and isinstance(indices[1], int):
@@ -261,7 +269,9 @@ def test_safe_indexing_2d_container_axis_0(array_type, indices_type):
 
 
 @pytest.mark.parametrize("array_type", ["list", "array", "series"])
-@pytest.mark.parametrize("indices_type", ["list", "array", "series", "slice"])
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
 def test_safe_indexing_1d_container(array_type, indices_type):
     indices = [1, 2]
     if indices_type == 'slice' and isinstance(indices[1], int):
@@ -275,7 +285,9 @@ def test_safe_indexing_1d_container(array_type, indices_type):
 
 
 @pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
-@pytest.mark.parametrize("indices_type", ["list", "array", "series", "slice"])
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
 @pytest.mark.parametrize("indices", [[1, 2], ["col_1", "col_2"]])
 def test_safe_indexing_2d_container_axis_1(array_type, indices_type, indices):
     # validation of the indices
@@ -328,7 +340,7 @@ def test_safe_indexing_2d_read_only_axis_1(array_read_only, indices_read_only,
 
 
 @pytest.mark.parametrize("array_type", ["list", "array", "series"])
-@pytest.mark.parametrize("indices_type", ["list", "array", "series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
 def test_safe_indexing_1d_container_mask(array_type, indices_type):
     indices = [False] + [True] * 2 + [False] * 6
     array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
@@ -340,7 +352,7 @@ def test_safe_indexing_1d_container_mask(array_type, indices_type):
 
 
 @pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
-@pytest.mark.parametrize("indices_type", ["list", "array", "series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
 @pytest.mark.parametrize(
     "axis, expected_subset",
     [(0, [[4, 5, 6], [7, 8, 9]]),

From 56455ee8a5d48ced4e665a829ce74578bb0d40ce Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 23 Sep 2019 10:02:55 +0200
Subject: [PATCH 72/86] PEP8

---
 sklearn/inspection/tests/test_plot_partial_dependence.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/inspection/tests/test_plot_partial_dependence.py b/sklearn/inspection/tests/test_plot_partial_dependence.py
index 29150168bdba5..1933063e40f79 100644
--- a/sklearn/inspection/tests/test_plot_partial_dependence.py
+++ b/sklearn/inspection/tests/test_plot_partial_dependence.py
@@ -306,7 +306,6 @@ def test_plot_partial_dependence_multioutput(pyplot, target):
 def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
     pd = pytest.importorskip('pandas')
     df = pd.DataFrame(boston.data, columns=boston.feature_names)
-    y = boston.target
 
     grid_resolution = 25
 

From c8c2a08c5e1a34aee9480fa4890c59f83ff5ac5a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 21 Oct 2019 11:45:53 +0200
Subject: [PATCH 73/86] reviews

---
 sklearn/inspection/partial_dependence.py         | 16 +++++++++-------
 .../inspection/tests/test_partial_dependence.py  |  3 ++-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index d0a556d923f8b..039df5eb2dbe3 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -298,7 +298,10 @@ def partial_dependence(estimator, X, features, response_method='auto',
         )
 
     if isinstance(estimator, Pipeline):
+        # assuming Pipeline si the only estimator that does not store a new
+        # attribute
         for est in estimator:
+            # FIXME: remove the None option when it will be deprecated
             if est not in (None, 'drop'):
                 check_is_fitted(est)
     else:
@@ -310,6 +313,8 @@ def partial_dependence(estimator, X, features, response_method='auto',
             'Multiclass-multioutput estimators are not supported'
         )
 
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
     if not(hasattr(X, '__array__') or sparse.issparse(X)):
         X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
 
@@ -363,18 +368,15 @@ def partial_dependence(estimator, X, features, response_method='auto',
             )
 
     if _determine_key_type(features) == 'int':
-        raise_err = False
         if (isinstance(features, Iterable) or
                 isinstance(features, numbers.Integral)):
             # _get_column_indices() support negative indexing. Here, we limit
             # the indexing to be positive. The upper bound will be checked
             # by _get_column_indices()
-            raise_err = np.all(np.less(features, 0))
-
-        if raise_err:
-            raise ValueError(
-                'all features must be in [0, {}]'.format(X.shape[1] - 1)
-            )
+            if np.any(np.less(features, 0)):
+                raise ValueError(
+                    'all features must be in [0, {}]'.format(X.shape[1] - 1)
+                )
 
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index caef8b97ade07..3fe3bc2cdbe8e 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -474,7 +474,8 @@ def test_partial_dependence_pipeline():
     ids=['features-integer', 'features-string']
 )
 def test_partial_dependence_dataframe(estimator, preprocessor, features):
-    # check that the partial dependence support dataframe
+    # check that the partial dependence support dataframe and pipeline
+    # including a column transformer
     pd = pytest.importorskip("pandas")
     df = pd.DataFrame(iris.data, columns=iris.feature_names)
 

From 4d427aa91791fd2ce499c354a186cc2426ce4f01 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 21 Oct 2019 12:08:25 +0200
Subject: [PATCH 74/86] safe_indexing is private

---
 sklearn/inspection/partial_dependence.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/inspection/partial_dependence.py b/sklearn/inspection/partial_dependence.py
index 039df5eb2dbe3..46be2970c3f7d 100644
--- a/sklearn/inspection/partial_dependence.py
+++ b/sklearn/inspection/partial_dependence.py
@@ -21,7 +21,7 @@
 from ..utils.extmath import cartesian
 from ..utils import check_array
 from ..utils import check_matplotlib_support  # noqa
-from ..utils import safe_indexing
+from ..utils import _safe_indexing
 from ..utils import _determine_key_type
 from ..utils import _get_column_indices
 from ..utils.validation import check_is_fitted
@@ -79,14 +79,14 @@ def _grid_from_X(X, percentiles, grid_resolution):
 
     values = []
     for feature in range(X.shape[1]):
-        uniques = np.unique(safe_indexing(X, feature, axis=1))
+        uniques = np.unique(_safe_indexing(X, feature, axis=1))
         if uniques.shape[0] < grid_resolution:
             # feature has low resolution use unique vals
             axis = uniques
         else:
             # create axis based on percentiles and grid resolution
             emp_percentiles = mquantiles(
-                safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
+                _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
             )
             if np.allclose(emp_percentiles[0], emp_percentiles[1]):
                 raise ValueError(
@@ -383,7 +383,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
     ).ravel()
 
     grid, values = _grid_from_X(
-        safe_indexing(X, features_indices, axis=1), percentiles,
+        _safe_indexing(X, features_indices, axis=1), percentiles,
         grid_resolution
     )
 

From b7c684432a56ca3d6dbc256cec7bc9f06f6a8874 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 23 Oct 2019 16:52:06 +0200
Subject: [PATCH 75/86] fix comments

---
 sklearn/inspection/_partial_dependence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index 46be2970c3f7d..fb568040dc45c 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -370,7 +370,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
     if _determine_key_type(features) == 'int':
         if (isinstance(features, Iterable) or
                 isinstance(features, numbers.Integral)):
-            # _get_column_indices() support negative indexing. Here, we limit
+            # _get_column_indices() supports negative indexing. Here, we limit
             # the indexing to be positive. The upper bound will be checked
             # by _get_column_indices()
             if np.any(np.less(features, 0)):

From 2579ef71b957417fd55108e67f5385c97bbe70f3 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 24 Oct 2019 14:20:47 +0200
Subject: [PATCH 76/86] iter

---
 sklearn/_build_utils/deprecated_modules.py              | 2 --
 sklearn/inspection/tests/test_permutation_importance.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/sklearn/_build_utils/deprecated_modules.py b/sklearn/_build_utils/deprecated_modules.py
index cf2bbe2606558..3069aafae0f58 100644
--- a/sklearn/_build_utils/deprecated_modules.py
+++ b/sklearn/_build_utils/deprecated_modules.py
@@ -143,8 +143,6 @@
 
     ('_partial_dependence', 'sklearn.inspection.partial_dependence',
      'sklearn.inspection', 'partial_dependence'),
-    ('_permutation_importance', 'sklearn.inspection.permutation_importance',
-     'sklearn.inspection', 'permutation_importance'),
 
     ('_ball_tree', 'sklearn.neighbors.ball_tree', 'sklearn.neighbors',
      'BallTree'),
diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py
index b444310695dee..671a1e11b1fec 100644
--- a/sklearn/inspection/tests/test_permutation_importance.py
+++ b/sklearn/inspection/tests/test_permutation_importance.py
@@ -18,7 +18,6 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.preprocessing import scale
 
-
 @pytest.mark.parametrize("n_jobs", [1, 2])
 def test_permutation_importance_correlated_feature_regression(n_jobs):
     # Make sure that feature highly correlated to the target have a higher

From a5777ad57b81a307c92878c4688e8a2b04c9e2dd Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 24 Oct 2019 18:44:12 +0200
Subject: [PATCH 77/86] reduce list of estimator to check for fitness

---
 sklearn/inspection/tests/test_partial_dependence.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index a2df49985516c..db8d29773404c 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -542,17 +542,10 @@ def test_partial_dependence_feature_type(features, expected_pd_shape):
 
 
 @pytest.mark.parametrize(
-    "name, Estimator", all_estimators(type_filter=['classifier', 'regressor'])
+    "estimator", [LinearRegression(), LogisticRegression(),
+                  GradientBoostingRegressor(), GradientBoostingClassifier()]
 )
-def test_partial_dependence_unfitted(name, Estimator):
-    try:
-        estimator = Estimator()
-    except TypeError:
-        raise SkipTest(
-            'The {} estimator cannot be built with default parameters'
-            .format(name)
-        )
-
+def test_partial_dependence_unfitted(estimator):
     X = iris.data
     preprocessor = make_column_transformer(
         (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])

From 0aa3cd901ec0d6c42f6d6ca1373cf61b1a75da5b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 24 Oct 2019 18:45:08 +0200
Subject: [PATCH 78/86] remove unused import

---
 sklearn/inspection/tests/test_partial_dependence.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index db8d29773404c..2f8d9faf7bc17 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -33,7 +33,6 @@
 from sklearn.dummy import DummyClassifier
 from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.exceptions import NotFittedError
-from sklearn.utils.testing import all_estimators
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import SkipTest

From dc56f7b0b780cb4da790704ca98445ab692fa664 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 24 Oct 2019 18:47:01 +0200
Subject: [PATCH 79/86] fix

---
 sklearn/inspection/tests/test_partial_dependence.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 2f8d9faf7bc17..104451aa75457 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -35,7 +35,6 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import SkipTest
 
 
 # toy sample

From e1de4a4572bc1d0b121198d89d6771f53fddec7b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 24 Oct 2019 22:40:03 +0200
Subject: [PATCH 80/86] address thomas comments

---
 sklearn/inspection/_partial_dependence.py           | 3 ++-
 sklearn/inspection/tests/test_partial_dependence.py | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index fb568040dc45c..9538c3b67521e 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -298,7 +298,8 @@ def partial_dependence(estimator, X, features, response_method='auto',
         )
 
     if isinstance(estimator, Pipeline):
-        # assuming Pipeline si the only estimator that does not store a new
+        # TODO: to be removed if/when pipeline get a `steps_` attributes
+        # assuming Pipeline is the only estimator that does not store a new
         # attribute
         for est in estimator:
             # FIXME: remove the None option when it will be deprecated
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 104451aa75457..b278c7d1e5401 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -463,8 +463,11 @@ def test_partial_dependence_pipeline():
     [None,
      make_column_transformer(
          (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
-         (RobustScaler(), [iris.feature_names[i] for i in (1, 3)]))],
-    ids=['None', 'column-transformer']
+         (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])),
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         remainder='passthrough')],
+    ids=['None', 'column-transformer', 'column-transformer-passthrough']
 )
 @pytest.mark.parametrize(
     "features",

From 53cdf4a35c2a8328ffce3c29bc1c75e8d9ec1bc3 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 28 Oct 2019 15:57:33 +0100
Subject: [PATCH 81/86] remove support for slice

---
 sklearn/inspection/_partial_dependence.py     | 28 +++++++++++++------
 .../tests/test_partial_dependence.py          | 19 +++++++++++--
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index fb568040dc45c..b5c2d20d52c00 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -367,16 +367,26 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "'decision_function'. Got {}.".format(response_method)
             )
 
+    if isinstance(features, slice):
+        err_msg = ("Unsupported type for the parameter 'features'. The "
+                   "expected type is {}. Got {!r} instead.")
+        if hasattr(X, "iloc"):
+            specified_msg = ("a column name or an integer column indice or an "
+                             "array-like containing column names or integer "
+                             "column indices")
+        else:
+            specified_msg = ("an integer column indice or an array-like "
+                             "containing integer column indices")
+        raise ValueError(err_msg.format(specified_msg, features))
+
     if _determine_key_type(features) == 'int':
-        if (isinstance(features, Iterable) or
-                isinstance(features, numbers.Integral)):
-            # _get_column_indices() supports negative indexing. Here, we limit
-            # the indexing to be positive. The upper bound will be checked
-            # by _get_column_indices()
-            if np.any(np.less(features, 0)):
-                raise ValueError(
-                    'all features must be in [0, {}]'.format(X.shape[1] - 1)
-                )
+        # _get_column_indices() supports negative indexing. Here, we limit
+        # the indexing to be positive. The upper bound will be checked
+        # by _get_column_indices()
+        if np.any(np.less(features, 0)):
+            raise ValueError(
+                'all features must be in [0, {}]'.format(X.shape[1] - 1)
+            )
 
     features_indices = np.asarray(
         _get_column_indices(X, features), dtype=np.int32, order='C'
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 104451aa75457..7d49f097254d3 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -341,6 +341,22 @@ def test_partial_dependence_error(estimator, params, err_msg):
         partial_dependence(estimator, X, **params)
 
 
+@pytest.mark.parametrize(
+    "with_dataframe, err_msg",
+    [(True, "a column name or an integer column indice"),
+     (False, "an integer column indice or an array-like")]
+)
+def test_partial_dependence_slice_error(with_dataframe, err_msg):
+    X, y = make_classification(random_state=0)
+    if with_dataframe:
+        pd = pytest.importorskip('pandas')
+        X = pd.DataFrame(X)
+    estimator = LogisticRegression().fit(X, y)
+
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, features=slice(0, 2, 1))
+
+
 @pytest.mark.parametrize(
     'estimator',
     [LinearRegression(), GradientBoostingClassifier(random_state=0)]
@@ -515,9 +531,8 @@ def test_partial_dependence_dataframe(estimator, preprocessor, features):
      (iris.feature_names[0], (3, 10)),
      ([0, 2], (3, 10, 10)),
      ([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
-     (slice(0, 2, 1), (3, 10, 10)),
      ([True, False, True, False], (3, 10, 10))],
-    ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'slice', 'mask']
+    ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'mask']
 )
 def test_partial_dependence_feature_type(features, expected_pd_shape):
     # check all possible features type supported in PDP

From fa9f04a44711a1037ddb3e545dcdff08fc6c2aca Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 30 Oct 2019 10:58:15 +0100
Subject: [PATCH 82/86] add accept_slice to _determine_key_dtype

---
 sklearn/inspection/_partial_dependence.py          | 14 +-------------
 .../inspection/tests/test_partial_dependence.py    |  6 +++---
 sklearn/utils/__init__.py                          |  9 ++++++++-
 sklearn/utils/tests/test_utils.py                  |  5 +++++
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index 099af0a4b8d9f..90cd94ae8852c 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -368,19 +368,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
                 "'decision_function'. Got {}.".format(response_method)
             )
 
-    if isinstance(features, slice):
-        err_msg = ("Unsupported type for the parameter 'features'. The "
-                   "expected type is {}. Got {!r} instead.")
-        if hasattr(X, "iloc"):
-            specified_msg = ("a column name or an integer column indice or an "
-                             "array-like containing column names or integer "
-                             "column indices")
-        else:
-            specified_msg = ("an integer column indice or an array-like "
-                             "containing integer column indices")
-        raise ValueError(err_msg.format(specified_msg, features))
-
-    if _determine_key_type(features) == 'int':
+    if _determine_key_type(features, accept_slice=False) == 'int':
         # _get_column_indices() supports negative indexing. Here, we limit
         # the indexing to be positive. The upper bound will be checked
         # by _get_column_indices()
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 8646a1d97b645..8d3194f34249f 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -343,8 +343,8 @@ def test_partial_dependence_error(estimator, params, err_msg):
 
 @pytest.mark.parametrize(
     "with_dataframe, err_msg",
-    [(True, "a column name or an integer column indice"),
-     (False, "an integer column indice or an array-like")]
+    [(True, "Only array-like or scalar are supported"),
+     (False, "Only array-like or scalar are supported")]
 )
 def test_partial_dependence_slice_error(with_dataframe, err_msg):
     X, y = make_classification(random_state=0)
@@ -353,7 +353,7 @@ def test_partial_dependence_slice_error(with_dataframe, err_msg):
         X = pd.DataFrame(X)
     estimator = LogisticRegression().fit(X, y)
 
-    with pytest.raises(ValueError, match=err_msg):
+    with pytest.raises(TypeError, match=err_msg):
         partial_dependence(estimator, X, features=slice(0, 2, 1))
 
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index d2d1bc3435add..923eba7a09065 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -223,13 +223,15 @@ def _list_indexing(X, key, key_dtype):
     return [X[idx] for idx in key]
 
 
-def _determine_key_type(key):
+def _determine_key_type(key, accept_slice=True):
     """Determine the data type of key.
 
     Parameters
     ----------
     key : scalar, slice or array-like
         The key from which we want to infer the data type.
+    accept_slice : bool, default=True
+        Whether or not to raise an error if the key is a slice.
 
     Returns
     -------
@@ -252,6 +254,11 @@ def _determine_key_type(key):
         except KeyError:
             raise ValueError(err_msg)
     if isinstance(key, slice):
+        if not accept_slice:
+            raise TypeError(
+                'Only array-like or scalar are supported. '
+                'A Python slice was given.'
+            )
         if key.start is None and key.stop is None:
             return None
         key_start_type = _determine_key_type(key.start)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 55ae8d373ea1f..2cf1e59a73f29 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -231,6 +231,11 @@ def test_determine_key_type_error():
         _determine_key_type(1.0)
 
 
+def test_determine_key_type_slice_error():
+    with pytest.raises(TypeError, match="Only array-like or scalar are"):
+        _determine_key_type(slice(0, 2, 1), accept_slice=False)
+
+
 def _convert_container(container, constructor_name, columns_name=None):
     if constructor_name == 'list':
         return list(container)

From f7f7096b9f8a74869c8f22b4a7b59ae5763de5bd Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 30 Oct 2019 16:55:25 +0100
Subject: [PATCH 83/86] docstring

---
 sklearn/utils/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 923eba7a09065..4d4ef606341ca 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -230,6 +230,7 @@ def _determine_key_type(key, accept_slice=True):
     ----------
     key : scalar, slice or array-like
         The key from which we want to infer the data type.
+
     accept_slice : bool, default=True
         Whether or not to raise an error if the key is a slice.
 

From 8029cf4f6896d70c4025504805c39a065efc39ff Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 30 Oct 2019 17:56:04 +0100
Subject: [PATCH 84/86] docstring

---
 sklearn/inspection/_partial_dependence.py | 30 +++++++++++++++++------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index 90cd94ae8852c..a2475e272dbce 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -49,9 +49,11 @@ def _grid_from_X(X, percentiles, grid_resolution):
     ----------
     X : ndarray, shape (n_samples, n_target_features)
         The data
+
     percentiles : tuple of floats
         The percentiles which are used to construct the extreme values of
         the grid. Must be in [0, 1].
+
     grid_resolution : int
         The number of equally spaced points to be placed on the grid for each
         feature.
@@ -61,6 +63,7 @@ def _grid_from_X(X, percentiles, grid_resolution):
     grid : ndarray, shape (n_points, n_target_features)
         A value for each feature at each point in the grid. ``n_points`` is
         always ``<= grid_resolution ** X.shape[1]``.
+
     values : list of 1d ndarrays
         The values with which the grid has been created. The size of each
         array ``values[j]`` is either ``grid_resolution``, or the number of
@@ -191,13 +194,16 @@ def partial_dependence(estimator, X, features, response_method='auto',
         A fitted estimator object implementing :term:`predict`,
         :term:`predict_proba`, or :term:`decision_function`.
         Multioutput-multiclass classifiers are not supported.
+
     X : {array-like or dataframe} of shape (n_samples, n_features)
         ``X`` is used both to generate a grid of values for the
         ``features``, and to compute the averaged predictions when
         method is 'brute'.
+
     features : array-like of {int, str}
-        The target features for which the partial dependency should be
-        computed.
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
     response_method : 'auto', 'predict_proba' or 'decision_function', \
             optional (default='auto')
         Specifies whether to use :term:`predict_proba` or
@@ -207,12 +213,15 @@ def partial_dependence(estimator, X, features, response_method='auto',
         and we revert to :term:`decision_function` if it doesn't exist. If
         ``method`` is 'recursion', the response is always the output of
         :term:`decision_function`.
+
     percentiles : tuple of float, optional (default=(0.05, 0.95))
         The lower and upper percentile used to create the extreme values
         for the grid. Must be in [0, 1].
+
     grid_resolution : int, optional (default=100)
         The number of equally spaced points on the grid, for each target
         feature.
+
     method : str, optional (default='auto')
         The method used to calculate the averaged predictions:
 
@@ -224,7 +233,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
           but is more efficient in terms of speed.
           With this method, ``X`` is only used to build the
           grid and the partial dependences are computed using the training
-          data. This method does not account for the ``init`` predicor of
+          data. This method does not account for the ``init`` predictor of
           the boosting process, which may lead to incorrect values (see
           warning below). With this method, the target response of a
           classifier is always the decision function, not the predicted
@@ -256,6 +265,7 @@ def partial_dependence(estimator, X, features, response_method='auto',
         regression. For classical regression and binary classification
         ``n_outputs==1``. ``n_values_feature_j`` corresponds to the size
         ``values[j]``.
+
     values : seq of 1d ndarrays
         The values with which the grid has been created. The generated grid
         is a cartesian product of the arrays in ``values``. ``len(values) ==
@@ -423,6 +433,7 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
         A fitted estimator object implementing :term:`predict`,
         :term:`predict_proba`, or :term:`decision_function`.
         Multioutput-multiclass classifiers are not supported.
+
     X : {array-like or dataframe} of shape (n_samples, n_features)
         The data to use to build the grid of values on which the dependence
         will be evaluated. This is usually the training data.
@@ -481,7 +492,7 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
           but is more efficient in terms of speed.
           With this method, ``X`` is optional and is only used to build the
           grid and the partial dependences are computed using the training
-          data. This method does not account for the ``init`` predicor of
+          data. This method does not account for the ``init`` predictor of
           the boosting process, which may lead to incorrect values (see
           warning below. With this method, the target response of a
           classifier is always the decision function, not the predicted
@@ -520,7 +531,7 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
 
     ax : Matplotlib axes or array-like of Matplotlib axes, default=None
         - If a single axis is passed in, it is treated as a bounding axes
-            and a grid of partial depedendence plots will be drawn within
+            and a grid of partial dependence plots will be drawn within
             these bounds. The `n_cols` parameter controls the number of
             columns in the grid.
         - If an array-like of axes are passed in, the partial dependence
@@ -581,12 +592,15 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
         # regression and binary classification
         target_idx = 0
 
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
     X = check_array(X)
     n_features = X.shape[1]
 
     # convert feature_names to list
     if feature_names is None:
-        # if feature_names is None, use feature indices as name
+        # if feature_names is None, use feature indices as name for NumPy array
+        # or the column names for a dataframe
         feature_names = [str(i) for i in range(n_features)]
     elif isinstance(feature_names, np.ndarray):
         feature_names = feature_names.tolist()
@@ -709,7 +723,7 @@ class PartialDependenceDisplay:
         plot a two-way partial dependence curve as a contour plot.
 
     feature_names : list of str
-        Feature names corrsponding to the indicies in ``features``.
+        Feature names corresponding to the indices in ``features``.
 
     target_idx : int
 
@@ -777,7 +791,7 @@ def plot(self, ax=None, n_cols=3, line_kw=None, contour_kw=None):
         ----------
         ax : Matplotlib axes or array-like of Matplotlib axes, default=None
             - If a single axis is passed in, it is treated as a bounding axes
-                and a grid of partial depedendence plots will be drawn within
+                and a grid of partial dependence plots will be drawn within
                 these bounds. The `n_cols` parameter controls the number of
                 columns in the grid.
             - If an array-like of axes are passed in, the partial dependence

From a187e0cf36da74c4709d14c60a63979dcee77daf Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 30 Oct 2019 17:59:41 +0100
Subject: [PATCH 85/86] docstring

---
 sklearn/inspection/_partial_dependence.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index a2475e272dbce..75cfdb10a621d 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -592,15 +592,12 @@ def plot_partial_dependence(estimator, X, features, feature_names=None,
         # regression and binary classification
         target_idx = 0
 
-    # Use check_array only on lists and other non-array-likes / sparse. Do not
-    # convert DataFrame into a NumPy array.
     X = check_array(X)
     n_features = X.shape[1]
 
     # convert feature_names to list
     if feature_names is None:
-        # if feature_names is None, use feature indices as name for NumPy array
-        # or the column names for a dataframe
+        # if feature_names is None, use feature indices as name
         feature_names = [str(i) for i in range(n_features)]
     elif isinstance(feature_names, np.ndarray):
         feature_names = feature_names.tolist()

From 46aea9353fa45db45ea8a25fc5f5370d093be71b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Wed, 30 Oct 2019 18:13:42 +0100
Subject: [PATCH 86/86] update example

---
 .../inspection/plot_partial_dependence.py     | 24 +++++++++++--------
 ...ot_partial_dependence_visualization_api.py | 13 +++++-----
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py
index 0d79401e3f662..d7564d5ec95c7 100644
--- a/examples/inspection/plot_partial_dependence.py
+++ b/examples/inspection/plot_partial_dependence.py
@@ -30,6 +30,7 @@
 
 from time import time
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 
@@ -54,8 +55,8 @@
 # (here the average target, by default)
 
 cal_housing = fetch_california_housing()
-names = cal_housing.feature_names
-X, y = cal_housing.data, cal_housing.target
+X = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)
+y = cal_housing.target
 
 y -= y.mean()
 
@@ -104,8 +105,9 @@
 tic = time()
 # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
 # with the brute method.
-features = [0, 5, 1, 2]
-plot_partial_dependence(est, X_train, features, feature_names=names,
+features = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms']
+plot_partial_dependence(est, X_train, features,
+                        feature_names=X_train.columns.tolist(),
                         n_jobs=3, grid_resolution=20)
 print("done in {:.3f}s".format(time() - tic))
 fig = plt.gcf()
@@ -143,8 +145,10 @@
 
 print('Computing partial dependence plots...')
 tic = time()
-features = [0, 5, 1, 2, (5, 1)]
-plot_partial_dependence(est, X_train, features, feature_names=names,
+features = ['MedInc', 'AveOccup', 'HouseAge', 'AveRooms',
+            ('AveOccup', 'HouseAge')]
+plot_partial_dependence(est, X_train, features,
+                        feature_names=X_train.columns.tolist(),
                         n_jobs=3, grid_resolution=20)
 print("done in {:.3f}s".format(time() - tic))
 fig = plt.gcf()
@@ -192,16 +196,16 @@
 
 fig = plt.figure()
 
-target_feature = (1, 5)
-pdp, axes = partial_dependence(est, X_train, target_feature,
+features = ('AveOccup', 'HouseAge')
+pdp, axes = partial_dependence(est, X_train, features=features,
                                grid_resolution=20)
 XX, YY = np.meshgrid(axes[0], axes[1])
 Z = pdp[0].T
 ax = Axes3D(fig)
 surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1,
                        cmap=plt.cm.BuPu, edgecolor='k')
-ax.set_xlabel(names[target_feature[0]])
-ax.set_ylabel(names[target_feature[1]])
+ax.set_xlabel(features[0])
+ax.set_ylabel(features[1])
 ax.set_zlabel('Partial dependence')
 #  pretty init view
 ax.view_init(elev=22, azim=122)
diff --git a/examples/plot_partial_dependence_visualization_api.py b/examples/plot_partial_dependence_visualization_api.py
index 8884d52f80d25..911a2409efe0b 100644
--- a/examples/plot_partial_dependence_visualization_api.py
+++ b/examples/plot_partial_dependence_visualization_api.py
@@ -15,6 +15,7 @@
 """
 print(__doc__)
 
+import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.datasets import load_boston
 from sklearn.neural_network import MLPRegressor
@@ -32,8 +33,8 @@
 # housing price dataset.
 
 boston = load_boston()
-X, y = boston.data, boston.target
-feature_names = boston.feature_names
+X = pd.DataFrame(boston.data, columns=boston.feature_names)
+y = boston.target
 
 tree = DecisionTreeRegressor()
 mlp = make_pipeline(StandardScaler(),
@@ -55,7 +56,7 @@
 fig, ax = plt.subplots(figsize=(12, 6))
 ax.set_title("Decision Tree")
 tree_disp = plot_partial_dependence(tree, X, ["LSTAT", "RM"],
-                                    feature_names=feature_names, ax=ax)
+                                    feature_names=X.columns.tolist(), ax=ax)
 
 ##############################################################################
 # The partial depdendence curves can be plotted for the multi-layer perceptron.
@@ -65,7 +66,7 @@
 fig, ax = plt.subplots(figsize=(12, 6))
 ax.set_title("Multi-layer Perceptron")
 mlp_disp = plot_partial_dependence(mlp, X, ["LSTAT", "RM"],
-                                   feature_names=feature_names, ax=ax,
+                                   feature_names=X.columns.tolist(), ax=ax,
                                    line_kw={"c": "red"})
 
 ##############################################################################
@@ -134,7 +135,7 @@
 # the same axes. In this case, `tree_disp.axes_` is passed into the second
 # plot function.
 tree_disp = plot_partial_dependence(tree, X, ["LSTAT"],
-                                    feature_names=feature_names)
+                                    feature_names=X.columns.tolist())
 mlp_disp = plot_partial_dependence(mlp, X, ["LSTAT"],
-                                   feature_names=feature_names,
+                                   feature_names=X.columns.tolist(),
                                    ax=tree_disp.axes_, line_kw={"c": "red"})