From a501ca1118ba45f0a252a8f4dd8986ac411fb4af Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 9 Oct 2020 12:36:40 -0400 Subject: [PATCH 01/14] CI Check review_request_removed --- .github/workflows/unlabel.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/unlabel.yml diff --git a/.github/workflows/unlabel.yml b/.github/workflows/unlabel.yml new file mode 100644 index 0000000000000..8c2d66db759ad --- /dev/null +++ b/.github/workflows/unlabel.yml @@ -0,0 +1,14 @@ +name: Reviewed +# Runs when a review is submitted to a PR and +# remove the "Waiting for Reviewer" label +on: + pull_request_target: + types: review_request_removed + +jobs: + one: + runs-on: ubuntu-latest + steps: + - name: Check event + run: | + echo "Did this run" From 0fb69d43ddaa3325fb4a22cc6e674d756c207d71 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 14 Oct 2020 13:49:57 -0400 Subject: [PATCH 02/14] ENH Checks n_features_in_ in cluster module --- sklearn/cluster/_affinity_propagation.py | 4 ++-- sklearn/cluster/_birch.py | 9 ++++++--- sklearn/cluster/_feature_agglomeration.py | 6 +----- sklearn/cluster/_kmeans.py | 12 +++--------- sklearn/cluster/_mean_shift.py | 3 ++- sklearn/tests/test_common.py | 1 - 6 files changed, 14 insertions(+), 21 deletions(-) diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 89ff3eb461b8d..2ec2c32b3a2a9 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -10,7 +10,7 @@ from ..exceptions import ConvergenceWarning from ..base import BaseEstimator, ClusterMixin -from ..utils import as_float_array, check_array, check_random_state +from ..utils import as_float_array, check_random_state from ..utils.deprecation import deprecated from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..metrics import euclidean_distances @@ -446,7 +446,7 @@ def predict(self, X): Cluster labels. """ check_is_fitted(self) - X = check_array(X) + X = self._validate_data(X, reset=False) if not hasattr(self, "cluster_centers_"): raise ValueError("Predict method is not supported when " "affinity='precomputed'.") diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index f90c47953f9e9..79905024e5408 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -12,7 +12,6 @@ from ..metrics import pairwise_distances_argmin from ..metrics.pairwise import euclidean_distances from ..base import TransformerMixin, ClusterMixin, BaseEstimator -from ..utils import check_array from ..utils.extmath import row_norms from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..exceptions import ConvergenceWarning @@ -585,8 +584,9 @@ def predict(self, X): labels : ndarray of shape(n_samples,) Labelled data. """ - X = check_array(X, accept_sparse='csr') - self._check_fit(X) + check_is_fitted(self) + X = self._validate_data(X, accept_sparse='csr', + reset=False) kwargs = {'Y_norm_squared': self._subcluster_norms} return self.subcluster_labels_[ pairwise_distances_argmin(X, @@ -612,6 +612,9 @@ def transform(self, X): Transformed data. """ check_is_fitted(self) + self._validate_data(X, accept_sparse='csr', reset=False) + # XXX: input data validation is performed again in + # euclidean_distances. return euclidean_distances(X, self.subcluster_centers_) def _global_clustering(self, X=None): diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 1366971466f6a..e27a048366401 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -8,7 +8,6 @@ import numpy as np from ..base import TransformerMixin -from ..utils import check_array from ..utils.validation import check_is_fitted from scipy.sparse import issparse @@ -38,10 +37,7 @@ def transform(self, X): """ check_is_fitted(self) - X = check_array(X) - if len(self.labels_) != X.shape[1]: - raise ValueError("X has a different number of features than " - "during fitting.") + X = self._validate_data(X, reset=False) if self.pooling_func == np.mean and not issparse(X): size = np.bincount(self.labels_) n_samples = X.shape[0] diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 69901236d73b8..ef4ee7480fefb 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -852,15 +852,9 @@ def _validate_center_shape(self, X, centers): f"match the number of features of the data {X.shape[1]}.") def _check_test_data(self, X): - X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32], - order='C', accept_large_sparse=False) - n_samples, n_features = X.shape - expected_n_features = self.cluster_centers_.shape[1] - if not n_features == expected_n_features: - raise ValueError( - f"Incorrect number of features. Got {n_features} features, " - f"expected {expected_n_features}.") - + X = self._validate_data(X, accept_sparse='csr', reset=False, + dtype=[np.float64, np.float32], + order='C', accept_large_sparse=False) return X def _init_centroids(self, X, x_squared_norms, init, random_state, diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index 777d3b1832291..b06a59e897b0e 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -462,5 +462,6 @@ def predict(self, X): Index of the cluster each sample belongs to. """ check_is_fitted(self) - + X = self._validate_data(X, reset=False) + # TODO: pairwise_distances_argmin also validates return pairwise_distances_argmin(X, self.cluster_centers_) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index b84b66d1fb919..ff6735bcadd72 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -285,7 +285,6 @@ def test_strict_mode_parametrize_with_checks(estimator, check): # check_classifiers_train would need to be updated with the error message N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = { 'calibration', - 'cluster', 'compose', 'covariance', 'cross_decomposition', From 357f268ea13963488bd7ed15932c71b56b869af0 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 27 Oct 2020 22:44:02 -0400 Subject: [PATCH 03/14] CI Fixes CI sync --- .github/workflows/sync_pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync_pull_request.yml b/.github/workflows/sync_pull_request.yml index b2b14709dd2ba..9a2da9a86aebd 100644 --- a/.github/workflows/sync_pull_request.yml +++ b/.github/workflows/sync_pull_request.yml @@ -22,7 +22,7 @@ jobs: set -xe git remote add pr_remote ${{ github.event.pull_request.head.repo.html_url }} git fetch pr_remote ${{ github.event.pull_request.head.ref }} - git checkout pr_remote/${{ github.event.pull_request.head.ref }} + git checkout -b pr_branch pr_remote/${{ github.event.pull_request.head.ref }} git config user.name github-actions git config user.email github-actions@github.com git merge origin/master From ce54d6cd554cb26cee9ce0bee516b9f9402faf5f Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Sun, 1 Nov 2020 21:58:14 -0500 Subject: [PATCH 04/14] REV Reduces diff --- .github/workflows/unlabel.yml | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 .github/workflows/unlabel.yml diff --git a/.github/workflows/unlabel.yml b/.github/workflows/unlabel.yml deleted file mode 100644 index 8c2d66db759ad..0000000000000 --- a/.github/workflows/unlabel.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: Reviewed -# Runs when a review is submitted to a PR and -# remove the "Waiting for Reviewer" label -on: - pull_request_target: - types: review_request_removed - -jobs: - one: - runs-on: ubuntu-latest - steps: - - name: Check event - run: | - echo "Did this run" From 5596764cddcae5c4a9e534570f80afbca798894b Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 2 Nov 2020 11:30:54 -0500 Subject: [PATCH 05/14] STY Formatting --- sklearn/cluster/_birch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 79905024e5408..f8e7a9eaec3aa 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -585,8 +585,7 @@ def predict(self, X): Labelled data. """ check_is_fitted(self) - X = self._validate_data(X, accept_sparse='csr', - reset=False) + X = self._validate_data(X, accept_sparse='csr', reset=False) kwargs = {'Y_norm_squared': self._subcluster_norms} return self.subcluster_labels_[ pairwise_distances_argmin(X, From 86bd36c207089a9bbb6fa5ccce4dc6efea70d248 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 2 Nov 2020 12:26:28 -0500 Subject: [PATCH 06/14] ENH Uses context manager to avoid finite check --- sklearn/cluster/_affinity_propagation.py | 4 +++- sklearn/cluster/_birch.py | 16 ++++++++-------- sklearn/cluster/_mean_shift.py | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 2ec2c32b3a2a9..9937962095895 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -15,6 +15,7 @@ from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..metrics import euclidean_distances from ..metrics import pairwise_distances_argmin +from .._config import config_context def _equal_similarities_and_preferences(S, preference): @@ -452,7 +453,8 @@ def predict(self, X): "affinity='precomputed'.") if self.cluster_centers_.shape[0] > 0: - return pairwise_distances_argmin(X, self.cluster_centers_) + with config_context(assume_finite=True): + return pairwise_distances_argmin(X, self.cluster_centers_) else: warnings.warn("This model does not have any cluster centers " "because affinity propagation did not converge. " diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index f8e7a9eaec3aa..3c1b03e6b958d 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -16,6 +16,7 @@ from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering +from .._config import config_context def _iterate_sparse_X(X): @@ -587,11 +588,11 @@ def predict(self, X): check_is_fitted(self) X = self._validate_data(X, accept_sparse='csr', reset=False) kwargs = {'Y_norm_squared': self._subcluster_norms} - return self.subcluster_labels_[ - pairwise_distances_argmin(X, - self.subcluster_centers_, - metric_kwargs=kwargs) - ] + + with config_context(assume_finite=True): + argmin = pairwise_distances_argmin(X, self.subcluster_centers_, + metric_kwargs=kwargs) + return self.subcluster_labels_[argmin] def transform(self, X): """ @@ -612,9 +613,8 @@ def transform(self, X): """ check_is_fitted(self) self._validate_data(X, accept_sparse='csr', reset=False) - # XXX: input data validation is performed again in - # euclidean_distances. - return euclidean_distances(X, self.subcluster_centers_) + with config_context(assume_finite=True): + return euclidean_distances(X, self.subcluster_centers_) def _global_clustering(self, X=None): """ diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index b06a59e897b0e..fa62d2c8d9fe7 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -25,6 +25,7 @@ from ..base import BaseEstimator, ClusterMixin from ..neighbors import NearestNeighbors from ..metrics.pairwise import pairwise_distances_argmin +from .._config import config_context @_deprecate_positional_args @@ -463,5 +464,5 @@ def predict(self, X): """ check_is_fitted(self) X = self._validate_data(X, reset=False) - # TODO: pairwise_distances_argmin also validates - return pairwise_distances_argmin(X, self.cluster_centers_) + with config_context(assume_finite=True): + return pairwise_distances_argmin(X, self.cluster_centers_) From c53c97d55f5bfc17cd93131f5aaac6c4067181dc Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 2 Nov 2020 14:13:27 -0500 Subject: [PATCH 07/14] ENH Adds n_features_in_ checking in cross_decomposition --- sklearn/cross_decomposition/_pls.py | 7 ++++--- sklearn/tests/test_common.py | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index 9d8df42bf1a46..881d49e9a0c4e 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -316,7 +316,8 @@ def transform(self, X, Y=None, copy=True): `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise. """ check_is_fitted(self) - X = check_array(X, copy=copy, dtype=FLOAT_DTYPES) + X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, + reset=False) # Normalize X -= self.x_mean_ X /= self.x_std_ @@ -378,7 +379,7 @@ def predict(self, X, copy=True): space. """ check_is_fitted(self) - X = check_array(X, copy=copy, dtype=FLOAT_DTYPES) + X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False) # Normalize X -= self.x_mean_ X /= self.x_std_ @@ -925,7 +926,7 @@ def transform(self, X, Y=None): `(X_transformed, Y_transformed)` otherwise. """ check_is_fitted(self) - X = check_array(X, dtype=np.float64) + X = self._validate_data(X, dtype=np.float64, reset=False) Xr = (X - self.x_mean_) / self.x_std_ x_scores = np.dot(Xr, self.x_weights_) if Y is not None: diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 127595b95e900..b2fff25c79b6e 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -338,7 +338,6 @@ def test_search_cv(estimator, check, request): 'calibration', 'compose', 'covariance', - 'cross_decomposition', 'discriminant_analysis', 'ensemble', 'feature_extraction', From 50cfbba4f608f50d0f02f6bf3b412490e2909a72 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 2 Nov 2020 19:07:50 -0500 Subject: [PATCH 08/14] REV Reduces diff --- sklearn/cluster/_affinity_propagation.py | 8 +++----- sklearn/cluster/_birch.py | 20 +++++++++----------- sklearn/cluster/_feature_agglomeration.py | 6 +++++- sklearn/cluster/_kmeans.py | 12 +++++++++--- sklearn/cluster/_mean_shift.py | 6 ++---- sklearn/tests/test_common.py | 1 + 6 files changed, 29 insertions(+), 24 deletions(-) diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 9937962095895..89ff3eb461b8d 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -10,12 +10,11 @@ from ..exceptions import ConvergenceWarning from ..base import BaseEstimator, ClusterMixin -from ..utils import as_float_array, check_random_state +from ..utils import as_float_array, check_array, check_random_state from ..utils.deprecation import deprecated from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..metrics import euclidean_distances from ..metrics import pairwise_distances_argmin -from .._config import config_context def _equal_similarities_and_preferences(S, preference): @@ -447,14 +446,13 @@ def predict(self, X): Cluster labels. """ check_is_fitted(self) - X = self._validate_data(X, reset=False) + X = check_array(X) if not hasattr(self, "cluster_centers_"): raise ValueError("Predict method is not supported when " "affinity='precomputed'.") if self.cluster_centers_.shape[0] > 0: - with config_context(assume_finite=True): - return pairwise_distances_argmin(X, self.cluster_centers_) + return pairwise_distances_argmin(X, self.cluster_centers_) else: warnings.warn("This model does not have any cluster centers " "because affinity propagation did not converge. " diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 3c1b03e6b958d..f90c47953f9e9 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -12,11 +12,11 @@ from ..metrics import pairwise_distances_argmin from ..metrics.pairwise import euclidean_distances from ..base import TransformerMixin, ClusterMixin, BaseEstimator +from ..utils import check_array from ..utils.extmath import row_norms from ..utils.validation import check_is_fitted, _deprecate_positional_args from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering -from .._config import config_context def _iterate_sparse_X(X): @@ -585,14 +585,14 @@ def predict(self, X): labels : ndarray of shape(n_samples,) Labelled data. """ - check_is_fitted(self) - X = self._validate_data(X, accept_sparse='csr', reset=False) + X = check_array(X, accept_sparse='csr') + self._check_fit(X) kwargs = {'Y_norm_squared': self._subcluster_norms} - - with config_context(assume_finite=True): - argmin = pairwise_distances_argmin(X, self.subcluster_centers_, - metric_kwargs=kwargs) - return self.subcluster_labels_[argmin] + return self.subcluster_labels_[ + pairwise_distances_argmin(X, + self.subcluster_centers_, + metric_kwargs=kwargs) + ] def transform(self, X): """ @@ -612,9 +612,7 @@ def transform(self, X): Transformed data. """ check_is_fitted(self) - self._validate_data(X, accept_sparse='csr', reset=False) - with config_context(assume_finite=True): - return euclidean_distances(X, self.subcluster_centers_) + return euclidean_distances(X, self.subcluster_centers_) def _global_clustering(self, X=None): """ diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index e27a048366401..1366971466f6a 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -8,6 +8,7 @@ import numpy as np from ..base import TransformerMixin +from ..utils import check_array from ..utils.validation import check_is_fitted from scipy.sparse import issparse @@ -37,7 +38,10 @@ def transform(self, X): """ check_is_fitted(self) - X = self._validate_data(X, reset=False) + X = check_array(X) + if len(self.labels_) != X.shape[1]: + raise ValueError("X has a different number of features than " + "during fitting.") if self.pooling_func == np.mean and not issparse(X): size = np.bincount(self.labels_) n_samples = X.shape[0] diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 7e9891241a518..21b4ef9e06ead 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -854,9 +854,15 @@ def _validate_center_shape(self, X, centers): f"match the number of features of the data {X.shape[1]}.") def _check_test_data(self, X): - X = self._validate_data(X, accept_sparse='csr', reset=False, - dtype=[np.float64, np.float32], - order='C', accept_large_sparse=False) + X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32], + order='C', accept_large_sparse=False) + n_samples, n_features = X.shape + expected_n_features = self.cluster_centers_.shape[1] + if not n_features == expected_n_features: + raise ValueError( + f"Incorrect number of features. Got {n_features} features, " + f"expected {expected_n_features}.") + return X def _check_mkl_vcomp(self, X, n_samples): diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index fa62d2c8d9fe7..777d3b1832291 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -25,7 +25,6 @@ from ..base import BaseEstimator, ClusterMixin from ..neighbors import NearestNeighbors from ..metrics.pairwise import pairwise_distances_argmin -from .._config import config_context @_deprecate_positional_args @@ -463,6 +462,5 @@ def predict(self, X): Index of the cluster each sample belongs to. """ check_is_fitted(self) - X = self._validate_data(X, reset=False) - with config_context(assume_finite=True): - return pairwise_distances_argmin(X, self.cluster_centers_) + + return pairwise_distances_argmin(X, self.cluster_centers_) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 9206e5bcfa1b4..90736840a7881 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -336,6 +336,7 @@ def test_search_cv(estimator, check, request): # check_classifiers_train would need to be updated with the error message N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = { 'calibration', + 'cluster', 'compose', 'covariance', 'discriminant_analysis', From 2461bec2253023a100f83c6a6d5d4a1c0ac14ffb Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 2 Nov 2020 19:31:11 -0500 Subject: [PATCH 09/14] CLN Less diff --- sklearn/cross_decomposition/_pls.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index 881d49e9a0c4e..ada69e070a8f0 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -316,8 +316,7 @@ def transform(self, X, Y=None, copy=True): `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise. """ check_is_fitted(self) - X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, - reset=False) + X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False) # Normalize X -= self.x_mean_ X /= self.x_std_ From 2e9057a614db4cb47bd792672741b604ef44ebc3 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 4 Nov 2020 12:37:03 -0500 Subject: [PATCH 10/14] TST Expands dimensions --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3cd19967ba9c1..082d42648204c 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3158,7 +3158,7 @@ def check_n_features_in_after_fitting(name, estimator_orig, strict_mode=True): estimator.set_params(warm_start=False) n_samples = 100 - X = rng.normal(loc=100, size=(n_samples, 2)) + X = rng.normal(loc=100, size=(n_samples, 4)) X = _pairwise_estimator_convert_X(X, estimator) if is_regressor(estimator): y = rng.normal(size=n_samples) From 5bce61848393d72f31027bb0b5c4d10a61c9d470 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 4 Dec 2020 16:28:04 -0500 Subject: [PATCH 11/14] TST Bigger feature matrix --- sklearn/utils/estimator_checks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index b83989a76be09..0fe1d5882a300 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -63,7 +63,7 @@ load_iris, make_blobs, make_multilabel_classification, - make_regression, + make_regression ) REGRESSION_DATASET = None @@ -3119,8 +3119,8 @@ def check_n_features_in_after_fitting(name, estimator_orig): if 'warm_start' in estimator.get_params(): estimator.set_params(warm_start=False) - n_samples = 100 - X = rng.normal(loc=100, size=(n_samples, 4)) + n_samples = 150 + X = rng.normal(size=(n_samples, 8)) X = _pairwise_estimator_convert_X(X, estimator) if is_regressor(estimator): y = rng.normal(size=n_samples) From f4e508c79565d6c89de1acc457b9ad15da67bea4 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 25 Jan 2021 22:34:57 -0500 Subject: [PATCH 12/14] ENH Fixes test --- sklearn/utils/estimator_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index df9b4240a23fb..c72a247c16cf3 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3124,6 +3124,7 @@ def check_n_features_in_after_fitting(name, estimator_orig): n_samples = 150 X = rng.normal(size=(n_samples, 8)) + X = _enforce_estimator_tags_x(estimator, X) X = _pairwise_estimator_convert_X(X, estimator) if is_regressor(estimator): y = rng.normal(size=n_samples) From 7880f02a222f045aa1ef18430f3c7e74b3c16535 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 26 Jan 2021 20:41:54 -0500 Subject: [PATCH 13/14] ENH Adds fix for cross_decomposition --- sklearn/utils/estimator_checks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index c72a247c16cf3..e82added8799f 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3126,12 +3126,17 @@ def check_n_features_in_after_fitting(name, estimator_orig): X = rng.normal(size=(n_samples, 8)) X = _enforce_estimator_tags_x(estimator, X) X = _pairwise_estimator_convert_X(X, estimator) + if is_regressor(estimator): y = rng.normal(size=n_samples) else: y = rng.randint(low=0, high=2, size=n_samples) y = _enforce_estimator_tags_y(estimator, y) + if name in CROSS_DECOMPOSITION: + y = np.c_[y, y] + y[::2, 1] *= 2 + estimator.fit(X, y) assert estimator.n_features_in_ == X.shape[1] From 5661b50c4552ade901dc2241d92bc1152196317c Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 29 Jan 2021 13:34:36 -0500 Subject: [PATCH 14/14] ENH Sets n_componenets for cross_decomposition --- sklearn/utils/estimator_checks.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index e82added8799f..e811c3c3679e9 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -646,6 +646,9 @@ def _set_checking_parameters(estimator): if name == 'OneHotEncoder': estimator.set_params(handle_unknown='ignore') + if name in CROSS_DECOMPOSITION: + estimator.set_params(n_components=1) + class _NotAnArray: """An object that is convertible to an array. @@ -3133,10 +3136,6 @@ def check_n_features_in_after_fitting(name, estimator_orig): y = rng.randint(low=0, high=2, size=n_samples) y = _enforce_estimator_tags_y(estimator, y) - if name in CROSS_DECOMPOSITION: - y = np.c_[y, y] - y[::2, 1] *= 2 - estimator.fit(X, y) assert estimator.n_features_in_ == X.shape[1]