From a501ca1118ba45f0a252a8f4dd8986ac411fb4af Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 9 Oct 2020 12:36:40 -0400
Subject: [PATCH 01/14] CI Check review_request_removed

---
 .github/workflows/unlabel.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 .github/workflows/unlabel.yml

diff --git a/.github/workflows/unlabel.yml b/.github/workflows/unlabel.yml
new file mode 100644
index 0000000000000..8c2d66db759ad
--- /dev/null
+++ b/.github/workflows/unlabel.yml
@@ -0,0 +1,14 @@
+name: Reviewed
+# Runs when a review is submitted to a PR and
+# remove the "Waiting for Reviewer" label
+on:
+  pull_request_target:
+    types: review_request_removed
+
+jobs:
+  one:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check event
+        run: |
+          echo "Did this run"

From 0fb69d43ddaa3325fb4a22cc6e674d756c207d71 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 14 Oct 2020 13:49:57 -0400
Subject: [PATCH 02/14] ENH Checks n_features_in_ in cluster module

---
 sklearn/cluster/_affinity_propagation.py  |  4 ++--
 sklearn/cluster/_birch.py                 |  9 ++++++---
 sklearn/cluster/_feature_agglomeration.py |  6 +-----
 sklearn/cluster/_kmeans.py                | 12 +++---------
 sklearn/cluster/_mean_shift.py            |  3 ++-
 sklearn/tests/test_common.py              |  1 -
 6 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 89ff3eb461b8d..2ec2c32b3a2a9 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -10,7 +10,7 @@
 
 from ..exceptions import ConvergenceWarning
 from ..base import BaseEstimator, ClusterMixin
-from ..utils import as_float_array, check_array, check_random_state
+from ..utils import as_float_array, check_random_state
 from ..utils.deprecation import deprecated
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..metrics import euclidean_distances
@@ -446,7 +446,7 @@ def predict(self, X):
             Cluster labels.
         """
         check_is_fitted(self)
-        X = check_array(X)
+        X = self._validate_data(X, reset=False)
         if not hasattr(self, "cluster_centers_"):
             raise ValueError("Predict method is not supported when "
                              "affinity='precomputed'.")
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index f90c47953f9e9..79905024e5408 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -12,7 +12,6 @@
 from ..metrics import pairwise_distances_argmin
 from ..metrics.pairwise import euclidean_distances
 from ..base import TransformerMixin, ClusterMixin, BaseEstimator
-from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..exceptions import ConvergenceWarning
@@ -585,8 +584,9 @@ def predict(self, X):
         labels : ndarray of shape(n_samples,)
             Labelled data.
         """
-        X = check_array(X, accept_sparse='csr')
-        self._check_fit(X)
+        check_is_fitted(self)
+        X = self._validate_data(X, accept_sparse='csr',
+                                reset=False)
         kwargs = {'Y_norm_squared': self._subcluster_norms}
         return self.subcluster_labels_[
                 pairwise_distances_argmin(X,
@@ -612,6 +612,9 @@ def transform(self, X):
             Transformed data.
         """
         check_is_fitted(self)
+        self._validate_data(X, accept_sparse='csr', reset=False)
+        # XXX: input data validation is performed again in
+        # euclidean_distances.
         return euclidean_distances(X, self.subcluster_centers_)
 
     def _global_clustering(self, X=None):
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index 1366971466f6a..e27a048366401 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -8,7 +8,6 @@
 import numpy as np
 
 from ..base import TransformerMixin
-from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from scipy.sparse import issparse
 
@@ -38,10 +37,7 @@ def transform(self, X):
         """
         check_is_fitted(self)
 
-        X = check_array(X)
-        if len(self.labels_) != X.shape[1]:
-            raise ValueError("X has a different number of features than "
-                             "during fitting.")
+        X = self._validate_data(X, reset=False)
         if self.pooling_func == np.mean and not issparse(X):
             size = np.bincount(self.labels_)
             n_samples = X.shape[0]
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
index 69901236d73b8..ef4ee7480fefb 100644
--- a/sklearn/cluster/_kmeans.py
+++ b/sklearn/cluster/_kmeans.py
@@ -852,15 +852,9 @@ def _validate_center_shape(self, X, centers):
                 f"match the number of features of the data {X.shape[1]}.")
 
     def _check_test_data(self, X):
-        X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],
-                        order='C', accept_large_sparse=False)
-        n_samples, n_features = X.shape
-        expected_n_features = self.cluster_centers_.shape[1]
-        if not n_features == expected_n_features:
-            raise ValueError(
-                f"Incorrect number of features. Got {n_features} features, "
-                f"expected {expected_n_features}.")
-
+        X = self._validate_data(X, accept_sparse='csr', reset=False,
+                                dtype=[np.float64, np.float32],
+                                order='C', accept_large_sparse=False)
         return X
 
     def _init_centroids(self, X, x_squared_norms, init, random_state,
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index 777d3b1832291..b06a59e897b0e 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -462,5 +462,6 @@ def predict(self, X):
             Index of the cluster each sample belongs to.
         """
         check_is_fitted(self)
-
+        X = self._validate_data(X, reset=False)
+        # TODO: pairwise_distances_argmin also validates
         return pairwise_distances_argmin(X, self.cluster_centers_)
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index b84b66d1fb919..ff6735bcadd72 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -285,7 +285,6 @@ def test_strict_mode_parametrize_with_checks(estimator, check):
 # check_classifiers_train would need to be updated with the error message
 N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {
     'calibration',
-    'cluster',
     'compose',
     'covariance',
     'cross_decomposition',

From 357f268ea13963488bd7ed15932c71b56b869af0 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 27 Oct 2020 22:44:02 -0400
Subject: [PATCH 03/14] CI Fixes CI sync

---
 .github/workflows/sync_pull_request.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sync_pull_request.yml b/.github/workflows/sync_pull_request.yml
index b2b14709dd2ba..9a2da9a86aebd 100644
--- a/.github/workflows/sync_pull_request.yml
+++ b/.github/workflows/sync_pull_request.yml
@@ -22,7 +22,7 @@ jobs:
           set -xe
           git remote add pr_remote ${{ github.event.pull_request.head.repo.html_url }}
           git fetch pr_remote ${{ github.event.pull_request.head.ref }}
-          git checkout pr_remote/${{ github.event.pull_request.head.ref }}
+          git checkout -b pr_branch pr_remote/${{ github.event.pull_request.head.ref }}
           git config user.name github-actions
           git config user.email github-actions@github.com
           git merge origin/master

From ce54d6cd554cb26cee9ce0bee516b9f9402faf5f Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sun, 1 Nov 2020 21:58:14 -0500
Subject: [PATCH 04/14] REV Reduces diff

---
 .github/workflows/unlabel.yml | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 .github/workflows/unlabel.yml

diff --git a/.github/workflows/unlabel.yml b/.github/workflows/unlabel.yml
deleted file mode 100644
index 8c2d66db759ad..0000000000000
--- a/.github/workflows/unlabel.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: Reviewed
-# Runs when a review is submitted to a PR and
-# remove the "Waiting for Reviewer" label
-on:
-  pull_request_target:
-    types: review_request_removed
-
-jobs:
-  one:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check event
-        run: |
-          echo "Did this run"

From 5596764cddcae5c4a9e534570f80afbca798894b Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 2 Nov 2020 11:30:54 -0500
Subject: [PATCH 05/14] STY Formatting

---
 sklearn/cluster/_birch.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index 79905024e5408..f8e7a9eaec3aa 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -585,8 +585,7 @@ def predict(self, X):
             Labelled data.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, accept_sparse='csr',
-                                reset=False)
+        X = self._validate_data(X, accept_sparse='csr', reset=False)
         kwargs = {'Y_norm_squared': self._subcluster_norms}
         return self.subcluster_labels_[
                 pairwise_distances_argmin(X,

From 86bd36c207089a9bbb6fa5ccce4dc6efea70d248 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 2 Nov 2020 12:26:28 -0500
Subject: [PATCH 06/14] ENH Uses context manager to avoid finite check

---
 sklearn/cluster/_affinity_propagation.py |  4 +++-
 sklearn/cluster/_birch.py                | 16 ++++++++--------
 sklearn/cluster/_mean_shift.py           |  5 +++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 2ec2c32b3a2a9..9937962095895 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -15,6 +15,7 @@
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..metrics import euclidean_distances
 from ..metrics import pairwise_distances_argmin
+from .._config import config_context
 
 
 def _equal_similarities_and_preferences(S, preference):
@@ -452,7 +453,8 @@ def predict(self, X):
                              "affinity='precomputed'.")
 
         if self.cluster_centers_.shape[0] > 0:
-            return pairwise_distances_argmin(X, self.cluster_centers_)
+            with config_context(assume_finite=True):
+                return pairwise_distances_argmin(X, self.cluster_centers_)
         else:
             warnings.warn("This model does not have any cluster centers "
                           "because affinity propagation did not converge. "
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index f8e7a9eaec3aa..3c1b03e6b958d 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -16,6 +16,7 @@
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..exceptions import ConvergenceWarning
 from . import AgglomerativeClustering
+from .._config import config_context
 
 
 def _iterate_sparse_X(X):
@@ -587,11 +588,11 @@ def predict(self, X):
         check_is_fitted(self)
         X = self._validate_data(X, accept_sparse='csr', reset=False)
         kwargs = {'Y_norm_squared': self._subcluster_norms}
-        return self.subcluster_labels_[
-                pairwise_distances_argmin(X,
-                                          self.subcluster_centers_,
-                                          metric_kwargs=kwargs)
-            ]
+
+        with config_context(assume_finite=True):
+            argmin = pairwise_distances_argmin(X, self.subcluster_centers_,
+                                               metric_kwargs=kwargs)
+        return self.subcluster_labels_[argmin]
 
     def transform(self, X):
         """
@@ -612,9 +613,8 @@ def transform(self, X):
         """
         check_is_fitted(self)
         self._validate_data(X, accept_sparse='csr', reset=False)
-        # XXX: input data validation is performed again in
-        # euclidean_distances.
-        return euclidean_distances(X, self.subcluster_centers_)
+        with config_context(assume_finite=True):
+            return euclidean_distances(X, self.subcluster_centers_)
 
     def _global_clustering(self, X=None):
         """
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index b06a59e897b0e..fa62d2c8d9fe7 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -25,6 +25,7 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..neighbors import NearestNeighbors
 from ..metrics.pairwise import pairwise_distances_argmin
+from .._config import config_context
 
 
 @_deprecate_positional_args
@@ -463,5 +464,5 @@ def predict(self, X):
         """
         check_is_fitted(self)
         X = self._validate_data(X, reset=False)
-        # TODO: pairwise_distances_argmin also validates
-        return pairwise_distances_argmin(X, self.cluster_centers_)
+        with config_context(assume_finite=True):
+            return pairwise_distances_argmin(X, self.cluster_centers_)

From c53c97d55f5bfc17cd93131f5aaac6c4067181dc Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 2 Nov 2020 14:13:27 -0500
Subject: [PATCH 07/14] ENH Adds n_features_in_ checking in cross_decomposition

---
 sklearn/cross_decomposition/_pls.py | 7 ++++---
 sklearn/tests/test_common.py        | 1 -
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
index 9d8df42bf1a46..881d49e9a0c4e 100644
--- a/sklearn/cross_decomposition/_pls.py
+++ b/sklearn/cross_decomposition/_pls.py
@@ -316,7 +316,8 @@ def transform(self, X, Y=None, copy=True):
         `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
         """
         check_is_fitted(self)
-        X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
+        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES,
+                                reset=False)
         # Normalize
         X -= self.x_mean_
         X /= self.x_std_
@@ -378,7 +379,7 @@ def predict(self, X, copy=True):
         space.
         """
         check_is_fitted(self)
-        X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
+        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
         # Normalize
         X -= self.x_mean_
         X /= self.x_std_
@@ -925,7 +926,7 @@ def transform(self, X, Y=None):
             `(X_transformed, Y_transformed)` otherwise.
         """
         check_is_fitted(self)
-        X = check_array(X, dtype=np.float64)
+        X = self._validate_data(X, dtype=np.float64, reset=False)
         Xr = (X - self.x_mean_) / self.x_std_
         x_scores = np.dot(Xr, self.x_weights_)
         if Y is not None:
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 127595b95e900..b2fff25c79b6e 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -338,7 +338,6 @@ def test_search_cv(estimator, check, request):
     'calibration',
     'compose',
     'covariance',
-    'cross_decomposition',
     'discriminant_analysis',
     'ensemble',
     'feature_extraction',

From 50cfbba4f608f50d0f02f6bf3b412490e2909a72 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 2 Nov 2020 19:07:50 -0500
Subject: [PATCH 08/14] REV Reduces diff

---
 sklearn/cluster/_affinity_propagation.py  |  8 +++-----
 sklearn/cluster/_birch.py                 | 20 +++++++++-----------
 sklearn/cluster/_feature_agglomeration.py |  6 +++++-
 sklearn/cluster/_kmeans.py                | 12 +++++++++---
 sklearn/cluster/_mean_shift.py            |  6 ++----
 sklearn/tests/test_common.py              |  1 +
 6 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 9937962095895..89ff3eb461b8d 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -10,12 +10,11 @@
 
 from ..exceptions import ConvergenceWarning
 from ..base import BaseEstimator, ClusterMixin
-from ..utils import as_float_array, check_random_state
+from ..utils import as_float_array, check_array, check_random_state
 from ..utils.deprecation import deprecated
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..metrics import euclidean_distances
 from ..metrics import pairwise_distances_argmin
-from .._config import config_context
 
 
 def _equal_similarities_and_preferences(S, preference):
@@ -447,14 +446,13 @@ def predict(self, X):
             Cluster labels.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, reset=False)
+        X = check_array(X)
         if not hasattr(self, "cluster_centers_"):
             raise ValueError("Predict method is not supported when "
                              "affinity='precomputed'.")
 
         if self.cluster_centers_.shape[0] > 0:
-            with config_context(assume_finite=True):
-                return pairwise_distances_argmin(X, self.cluster_centers_)
+            return pairwise_distances_argmin(X, self.cluster_centers_)
         else:
             warnings.warn("This model does not have any cluster centers "
                           "because affinity propagation did not converge. "
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index 3c1b03e6b958d..f90c47953f9e9 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -12,11 +12,11 @@
 from ..metrics import pairwise_distances_argmin
 from ..metrics.pairwise import euclidean_distances
 from ..base import TransformerMixin, ClusterMixin, BaseEstimator
+from ..utils import check_array
 from ..utils.extmath import row_norms
 from ..utils.validation import check_is_fitted, _deprecate_positional_args
 from ..exceptions import ConvergenceWarning
 from . import AgglomerativeClustering
-from .._config import config_context
 
 
 def _iterate_sparse_X(X):
@@ -585,14 +585,14 @@ def predict(self, X):
         labels : ndarray of shape(n_samples,)
             Labelled data.
         """
-        check_is_fitted(self)
-        X = self._validate_data(X, accept_sparse='csr', reset=False)
+        X = check_array(X, accept_sparse='csr')
+        self._check_fit(X)
         kwargs = {'Y_norm_squared': self._subcluster_norms}
-
-        with config_context(assume_finite=True):
-            argmin = pairwise_distances_argmin(X, self.subcluster_centers_,
-                                               metric_kwargs=kwargs)
-        return self.subcluster_labels_[argmin]
+        return self.subcluster_labels_[
+                pairwise_distances_argmin(X,
+                                          self.subcluster_centers_,
+                                          metric_kwargs=kwargs)
+            ]
 
     def transform(self, X):
         """
@@ -612,9 +612,7 @@ def transform(self, X):
             Transformed data.
         """
         check_is_fitted(self)
-        self._validate_data(X, accept_sparse='csr', reset=False)
-        with config_context(assume_finite=True):
-            return euclidean_distances(X, self.subcluster_centers_)
+        return euclidean_distances(X, self.subcluster_centers_)
 
     def _global_clustering(self, X=None):
         """
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index e27a048366401..1366971466f6a 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 from ..base import TransformerMixin
+from ..utils import check_array
 from ..utils.validation import check_is_fitted
 from scipy.sparse import issparse
 
@@ -37,7 +38,10 @@ def transform(self, X):
         """
         check_is_fitted(self)
 
-        X = self._validate_data(X, reset=False)
+        X = check_array(X)
+        if len(self.labels_) != X.shape[1]:
+            raise ValueError("X has a different number of features than "
+                             "during fitting.")
         if self.pooling_func == np.mean and not issparse(X):
             size = np.bincount(self.labels_)
             n_samples = X.shape[0]
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
index 7e9891241a518..21b4ef9e06ead 100644
--- a/sklearn/cluster/_kmeans.py
+++ b/sklearn/cluster/_kmeans.py
@@ -854,9 +854,15 @@ def _validate_center_shape(self, X, centers):
                 f"match the number of features of the data {X.shape[1]}.")
 
     def _check_test_data(self, X):
-        X = self._validate_data(X, accept_sparse='csr', reset=False,
-                                dtype=[np.float64, np.float32],
-                                order='C', accept_large_sparse=False)
+        X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32],
+                        order='C', accept_large_sparse=False)
+        n_samples, n_features = X.shape
+        expected_n_features = self.cluster_centers_.shape[1]
+        if not n_features == expected_n_features:
+            raise ValueError(
+                f"Incorrect number of features. Got {n_features} features, "
+                f"expected {expected_n_features}.")
+
         return X
 
     def _check_mkl_vcomp(self, X, n_samples):
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index fa62d2c8d9fe7..777d3b1832291 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -25,7 +25,6 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..neighbors import NearestNeighbors
 from ..metrics.pairwise import pairwise_distances_argmin
-from .._config import config_context
 
 
 @_deprecate_positional_args
@@ -463,6 +462,5 @@ def predict(self, X):
             Index of the cluster each sample belongs to.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, reset=False)
-        with config_context(assume_finite=True):
-            return pairwise_distances_argmin(X, self.cluster_centers_)
+
+        return pairwise_distances_argmin(X, self.cluster_centers_)
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 9206e5bcfa1b4..90736840a7881 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -336,6 +336,7 @@ def test_search_cv(estimator, check, request):
 # check_classifiers_train would need to be updated with the error message
 N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {
     'calibration',
+    'cluster',
     'compose',
     'covariance',
     'discriminant_analysis',

From 2461bec2253023a100f83c6a6d5d4a1c0ac14ffb Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 2 Nov 2020 19:31:11 -0500
Subject: [PATCH 09/14] CLN Less diff

---
 sklearn/cross_decomposition/_pls.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
index 881d49e9a0c4e..ada69e070a8f0 100644
--- a/sklearn/cross_decomposition/_pls.py
+++ b/sklearn/cross_decomposition/_pls.py
@@ -316,8 +316,7 @@ def transform(self, X, Y=None, copy=True):
         `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
         """
         check_is_fitted(self)
-        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES,
-                                reset=False)
+        X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
         # Normalize
         X -= self.x_mean_
         X /= self.x_std_

From 2e9057a614db4cb47bd792672741b604ef44ebc3 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 4 Nov 2020 12:37:03 -0500
Subject: [PATCH 10/14] TST Expands dimensions

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3cd19967ba9c1..082d42648204c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3158,7 +3158,7 @@ def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True):
         estimator.set_params(warm_start=False)
 
     n_samples = 100
-    X = rng.normal(loc=100, size=(n_samples, 2))
+    X = rng.normal(loc=100, size=(n_samples, 4))
     X = _pairwise_estimator_convert_X(X, estimator)
     if is_regressor(estimator):
         y = rng.normal(size=n_samples)

From 5bce61848393d72f31027bb0b5c4d10a61c9d470 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 4 Dec 2020 16:28:04 -0500
Subject: [PATCH 11/14] TST Bigger feature matrix

---
 sklearn/utils/estimator_checks.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b83989a76be09..0fe1d5882a300 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -63,7 +63,7 @@
     load_iris,
     make_blobs,
     make_multilabel_classification,
-    make_regression,
+    make_regression
 )
 
 REGRESSION_DATASET = None
@@ -3119,8 +3119,8 @@ def check_n_features_in_after_fitting(name, estimator_orig):
     if 'warm_start' in estimator.get_params():
         estimator.set_params(warm_start=False)
 
-    n_samples = 100
-    X = rng.normal(loc=100, size=(n_samples, 4))
+    n_samples = 150
+    X = rng.normal(size=(n_samples, 8))
     X = _pairwise_estimator_convert_X(X, estimator)
     if is_regressor(estimator):
         y = rng.normal(size=n_samples)

From f4e508c79565d6c89de1acc457b9ad15da67bea4 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 25 Jan 2021 22:34:57 -0500
Subject: [PATCH 12/14] ENH Fixes test

---
 sklearn/utils/estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index df9b4240a23fb..c72a247c16cf3 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3124,6 +3124,7 @@ def check_n_features_in_after_fitting(name, estimator_orig):
 
     n_samples = 150
     X = rng.normal(size=(n_samples, 8))
+    X = _enforce_estimator_tags_x(estimator, X)
     X = _pairwise_estimator_convert_X(X, estimator)
     if is_regressor(estimator):
         y = rng.normal(size=n_samples)

From 7880f02a222f045aa1ef18430f3c7e74b3c16535 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Tue, 26 Jan 2021 20:41:54 -0500
Subject: [PATCH 13/14] ENH Adds fix for cross_decomposition

---
 sklearn/utils/estimator_checks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index c72a247c16cf3..e82added8799f 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -3126,12 +3126,17 @@ def check_n_features_in_after_fitting(name, estimator_orig):
     X = rng.normal(size=(n_samples, 8))
     X = _enforce_estimator_tags_x(estimator, X)
     X = _pairwise_estimator_convert_X(X, estimator)
+
     if is_regressor(estimator):
         y = rng.normal(size=n_samples)
     else:
         y = rng.randint(low=0, high=2, size=n_samples)
     y = _enforce_estimator_tags_y(estimator, y)
 
+    if name in CROSS_DECOMPOSITION:
+        y = np.c_[y, y]
+        y[::2, 1] *= 2
+
     estimator.fit(X, y)
     assert estimator.n_features_in_ == X.shape[1]
 

From 5661b50c4552ade901dc2241d92bc1152196317c Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 29 Jan 2021 13:34:36 -0500
Subject: [PATCH 14/14] ENH Sets n_componenets for cross_decomposition

---
 sklearn/utils/estimator_checks.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index e82added8799f..e811c3c3679e9 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -646,6 +646,9 @@ def _set_checking_parameters(estimator):
     if name == 'OneHotEncoder':
         estimator.set_params(handle_unknown='ignore')
 
+    if name in CROSS_DECOMPOSITION:
+        estimator.set_params(n_components=1)
+
 
 class _NotAnArray:
     """An object that is convertible to an array.
@@ -3133,10 +3136,6 @@ def check_n_features_in_after_fitting(name, estimator_orig):
         y = rng.randint(low=0, high=2, size=n_samples)
     y = _enforce_estimator_tags_y(estimator, y)
 
-    if name in CROSS_DECOMPOSITION:
-        y = np.c_[y, y]
-        y[::2, 1] *= 2
-
     estimator.fit(X, y)
     assert estimator.n_features_in_ == X.shape[1]