Skip to content

[MRG] simplify check_is_fitted to use any fitted attributes #14545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
18fba6c
make check_is_fitted not take attributes
amueller Aug 1, 2019
e034ed8
cleanup, remove any_or_all
amueller Aug 1, 2019
1dc9258
fix LOF, birch, mixtures
amueller Aug 1, 2019
d6034ea
remove unused method
amueller Aug 1, 2019
3cb95ac
fix partial dependence function
amueller Aug 2, 2019
4d3a8b4
make change backward-compatible
amueller Aug 2, 2019
1181982
also allow private fitted attributes
amueller Aug 2, 2019
7ed876d
slight refactoring in CountVectorizer to mess less with the vocabulary
amueller Aug 2, 2019
8701cc0
added regression test for not being able to call inverse_transform be…
amueller Aug 2, 2019
be4a90f
add special check for classes
amueller Aug 2, 2019
7e33027
more functions to fix
amueller Aug 5, 2019
09e4192
Update sklearn/utils/validation.py
amueller Aug 6, 2019
40af13e
fix whitespace, keyword args
amueller Aug 6, 2019
ed957e3
Merge branch 'anything_fitted' of github.com:amueller/scikit-learn in…
amueller Aug 6, 2019
86aebe7
remove extra blank line
amueller Aug 8, 2019
ec25b3c
fix CI hopefully
amueller Aug 8, 2019
9038c62
deprecate all_or_any in check_is_fittec
amueller Aug 9, 2019
9862529
fix typo, add test for deprecation
amueller Aug 9, 2019
da382b8
Merge branch 'master' into anything_fitted
amueller Aug 12, 2019
e958e62
add comment on 0.23 removal of deprecated arguments to check_is_fitted
amueller Aug 12, 2019
0538f91
Apply suggestions from code review
amueller Aug 12, 2019
11995c8
Update sklearn/utils/validation.py
amueller Aug 12, 2019
7463363
pep8
amueller Aug 13, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/bench_plot_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,13 @@ def fit(self, X, y=None, **params):
return self

def transform(self, X):
check_is_fitted(self, 'components_')
check_is_fitted(self)
H = self.components_
W, _, self.n_iter_ = self._fit_transform(X, H=H, update_H=False)
return W

def inverse_transform(self, W):
check_is_fitted(self, 'components_')
check_is_fitted(self)
return np.dot(W, self.components_)

def fit_transform(self, X, y=None, W=None, H=None):
Expand Down
2 changes: 1 addition & 1 deletion doc/developers/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1381,7 +1381,7 @@ the correct interface more easily.
... def predict(self, X):
...
... # Check is fit had been called
... check_is_fitted(self, ['X_', 'y_'])
... check_is_fitted(self)
...
... # Input validation
... X = check_array(X)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def predict_proba(self, X):
C : array, shape (n_samples, n_classes)
The predicted probas.
"""
check_is_fitted(self, ["classes_", "calibrated_classifiers_"])
check_is_fitted(self)
X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],
force_all_finite=False)
# Compute the arithmetic mean of the predictions of the calibrated
Expand Down Expand Up @@ -244,7 +244,7 @@ def predict(self, X):
C : array, shape (n_samples,)
The predicted class.
"""
check_is_fitted(self, ["classes_", "calibrated_classifiers_"])
check_is_fitted(self)
return self.classes_[np.argmax(self.predict_proba(X), axis=1)]


Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/_feature_agglomeration.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def transform(self, X):
Y : array, shape = [n_samples, n_clusters] or [n_clusters]
The pooled values for each feature cluster.
"""
check_is_fitted(self, "labels_")
check_is_fitted(self)

X = check_array(X)
if len(self.labels_) != X.shape[1]:
Expand Down Expand Up @@ -71,7 +71,7 @@ def inverse_transform(self, Xred):
A vector of size n_samples with the values of Xred assigned to
each of the cluster of samples.
"""
check_is_fitted(self, "labels_")
check_is_fitted(self)

unil, inverse = np.unique(self.labels_, return_inverse=True)
return Xred[..., inverse]
2 changes: 1 addition & 1 deletion sklearn/cluster/affinity_propagation_.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def predict(self, X):
labels : ndarray, shape (n_samples,)
Cluster labels.
"""
check_is_fitted(self, "cluster_centers_indices_")
check_is_fitted(self)
if not hasattr(self, "cluster_centers_"):
raise ValueError("Predict method is not supported when "
"affinity='precomputed'.")
Expand Down
5 changes: 2 additions & 3 deletions sklearn/cluster/birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,7 @@ def partial_fit(self, X=None, y=None):
return self._fit(X)

def _check_fit(self, X):
check_is_fitted(self, ['subcluster_centers_', 'partial_fit_'],
all_or_any=any)
check_is_fitted(self)

if (hasattr(self, 'subcluster_centers_') and
X.shape[1] != self.subcluster_centers_.shape[1]):
Expand Down Expand Up @@ -583,7 +582,7 @@ def transform(self, X):
X_trans : {array-like, sparse matrix}, shape (n_samples, n_clusters)
Transformed data.
"""
check_is_fitted(self, 'subcluster_centers_')
check_is_fitted(self)
return euclidean_distances(X, self.subcluster_centers_)

def _global_clustering(self, X=None):
Expand Down
8 changes: 4 additions & 4 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ def transform(self, X):
X_new : array, shape [n_samples, k]
X transformed in the new space.
"""
check_is_fitted(self, 'cluster_centers_')
check_is_fitted(self)

X = self._check_test_data(X)
return self._transform(X)
Expand Down Expand Up @@ -1063,7 +1063,7 @@ def predict(self, X, sample_weight=None):
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
check_is_fitted(self, 'cluster_centers_')
check_is_fitted(self)

X = self._check_test_data(X)
x_squared_norms = row_norms(X, squared=True)
Expand All @@ -1090,7 +1090,7 @@ def score(self, X, y=None, sample_weight=None):
score : float
Opposite of the value of X on the K-means objective.
"""
check_is_fitted(self, 'cluster_centers_')
check_is_fitted(self)

X = self._check_test_data(X)
x_squared_norms = row_norms(X, squared=True)
Expand Down Expand Up @@ -1733,7 +1733,7 @@ def predict(self, X, sample_weight=None):
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
check_is_fitted(self, 'cluster_centers_')
check_is_fitted(self)

X = self._check_test_data(X)
return self._labels_inertia_minibatch(X, sample_weight)[0]
2 changes: 1 addition & 1 deletion sklearn/cluster/mean_shift_.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,6 @@ def predict(self, X):
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
check_is_fitted(self, "cluster_centers_")
check_is_fitted(self)

return pairwise_distances_argmin(X, self.cluster_centers_)
4 changes: 2 additions & 2 deletions sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def get_feature_names(self):
feature_names : list of strings
Names of the features produced by transform.
"""
check_is_fitted(self, 'transformers_')
check_is_fitted(self)
feature_names = []
for name, trans, _, _ in self._iter(fitted=True):
if trans == 'drop':
Expand Down Expand Up @@ -550,7 +550,7 @@ def transform(self, X):
sparse matrices.

"""
check_is_fitted(self, 'transformers_')
check_is_fitted(self)
X = _check_X(X)
if hasattr(X, "columns"):
X_feature_names = np.asarray(X.columns)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def predict(self, X):
Predicted values.

"""
check_is_fitted(self, "regressor_")
check_is_fitted(self)
pred = self.regressor_.predict(X)
if pred.ndim == 1:
pred_trans = self.transformer_.inverse_transform(
Expand Down
4 changes: 2 additions & 2 deletions sklearn/covariance/elliptic_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def decision_function(self, X):
compatibility with other outlier detection algorithms.

"""
check_is_fitted(self, 'offset_')
check_is_fitted(self)
negative_mahal_dist = self.score_samples(X)
return negative_mahal_dist - self.offset_

Expand All @@ -163,7 +163,7 @@ def score_samples(self, X):
negative_mahal_distances : array-like, shape (n_samples, )
Opposite of the Mahalanobis distances.
"""
check_is_fitted(self, 'offset_')
check_is_fitted(self)
return -self.mahalanobis(X)

def predict(self, X):
Expand Down
6 changes: 3 additions & 3 deletions sklearn/cross_decomposition/pls_.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def transform(self, X, Y=None, copy=True):
-------
x_scores if Y is not given, (x_scores, y_scores) otherwise.
"""
check_is_fitted(self, 'x_mean_')
check_is_fitted(self)
X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
# Normalize
X -= self.x_mean_
Expand Down Expand Up @@ -433,7 +433,7 @@ def predict(self, X, copy=True):
This call requires the estimation of a p x q matrix, which may
be an issue in high dimensional space.
"""
check_is_fitted(self, 'x_mean_')
check_is_fitted(self)
X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
# Normalize
X -= self.x_mean_
Expand Down Expand Up @@ -872,7 +872,7 @@ def transform(self, X, Y=None):
Target vectors, where n_samples is the number of samples and
n_targets is the number of response variables.
"""
check_is_fitted(self, 'x_mean_')
check_is_fitted(self)
X = check_array(X, dtype=np.float64)
Xr = (X - self.x_mean_) / self.x_std_
x_scores = np.dot(Xr, self.x_weights_)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def transform(self, X):
IncrementalPCA(batch_size=3, n_components=2)
>>> ipca.transform(X) # doctest: +SKIP
"""
check_is_fitted(self, ['mean_', 'components_'], all_or_any=all)
check_is_fitted(self)

X = check_array(X)
if self.mean_ is not None:
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ def transform(self, X):
Transformed data

"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

X = check_array(X)

Expand Down
8 changes: 4 additions & 4 deletions sklearn/decomposition/factor_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def transform(self, X):
X_new : array-like, shape (n_samples, n_components)
The latent variables of X.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

X = check_array(X)
Ih = np.eye(len(self.components_))
Expand All @@ -285,7 +285,7 @@ def get_covariance(self):
cov : array, shape (n_features, n_features)
Estimated covariance of data.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

cov = np.dot(self.components_.T, self.components_)
cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace
Expand All @@ -299,7 +299,7 @@ def get_precision(self):
precision : array, shape (n_features, n_features)
Estimated precision of data.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

n_features = self.components_.shape[1]

Expand Down Expand Up @@ -333,7 +333,7 @@ def score_samples(self, X):
ll : array, shape (n_samples,)
Log-likelihood of each sample under the current model
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

Xr = X - self.mean_
precision = self.get_precision()
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/fastica_.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def transform(self, X, copy=True):
-------
X_new : array-like, shape (n_samples, n_components)
"""
check_is_fitted(self, 'mixing_')
check_is_fitted(self)

X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
if self.whiten:
Expand All @@ -597,7 +597,7 @@ def inverse_transform(self, X, copy=True):
-------
X_new : array-like, shape (n_samples, n_features)
"""
check_is_fitted(self, 'mixing_')
check_is_fitted(self)

X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)
X = np.dot(X, self.mixing_.T)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def transform(self, X):
-------
X_new : array-like, shape (n_samples, n_components)
"""
check_is_fitted(self, 'X_fit_')
check_is_fitted(self)

# Compute centered gram matrix between X and training data X_fit_
K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,7 @@ def transform(self, X):
W : array, shape (n_samples, n_components)
Transformed data
"""
check_is_fitted(self, 'n_components_')
check_is_fitted(self)

W, _, n_iter_ = non_negative_factorization(
X=X, W=None, H=self.components_, n_components=self.n_components_,
Expand All @@ -1339,5 +1339,5 @@ def inverse_transform(self, W):

.. versionadded:: 0.18
"""
check_is_fitted(self, 'n_components_')
check_is_fitted(self)
return np.dot(W, self.components_)
4 changes: 2 additions & 2 deletions sklearn/decomposition/online_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def _unnormalized_transform(self, X):
doc_topic_distr : shape=(n_samples, n_components)
Document topic distribution for X.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

# make sure feature size is the same in fitted model and in X
X = self._check_non_neg_array(X, "LatentDirichletAllocation.transform")
Expand Down Expand Up @@ -748,7 +748,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
score : float
Perplexity score.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

X = self._check_non_neg_array(X,
"LatentDirichletAllocation.perplexity")
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def score_samples(self, X):
ll : array, shape (n_samples,)
Log-likelihood of each sample under the current model
"""
check_is_fitted(self, 'mean_')
check_is_fitted(self)

X = check_array(X)
Xr = X - self.mean_
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def transform(self, X):
X_new array, shape (n_samples, n_components)
Transformed data.
"""
check_is_fitted(self, 'components_')
check_is_fitted(self)

X = check_array(X)
X = X - self.mean_
Expand Down
6 changes: 3 additions & 3 deletions sklearn/discriminant_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def transform(self, X):
if self.solver == 'lsqr':
raise NotImplementedError("transform not implemented for 'lsqr' "
"solver (use 'svd' or 'eigen').")
check_is_fitted(self, ['xbar_', 'scalings_'], all_or_any=any)
check_is_fitted(self)

X = check_array(X)
if self.solver == 'svd':
Expand All @@ -528,7 +528,7 @@ def predict_proba(self, X):
C : array, shape (n_samples, n_classes)
Estimated probabilities.
"""
check_is_fitted(self, 'classes_')
check_is_fitted(self)

decision = self.decision_function(X)
if self.classes_.size == 2:
Expand Down Expand Up @@ -704,7 +704,7 @@ def fit(self, X, y):
return self

def _decision_function(self, X):
check_is_fitted(self, 'classes_')
check_is_fitted(self)

X = check_array(X)
norm2 = []
Expand Down
6 changes: 3 additions & 3 deletions sklearn/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def predict(self, X):
y : array, shape = [n_samples] or [n_samples, n_outputs]
Predicted target values for X.
"""
check_is_fitted(self, 'classes_')
check_is_fitted(self)

# numpy random_state expects Python int and not long as size argument
# under Windows
Expand Down Expand Up @@ -249,7 +249,7 @@ def predict_proba(self, X):
the model, where classes are ordered arithmetically, for each
output.
"""
check_is_fitted(self, 'classes_')
check_is_fitted(self)

# numpy random_state expects Python int and not long as size argument
# under Windows
Expand Down Expand Up @@ -498,7 +498,7 @@ def predict(self, X, return_std=False):
y_std : array, shape = [n_samples] or [n_samples, n_outputs]
Standard deviation of predictive distribution of query points.
"""
check_is_fitted(self, "constant_")
check_is_fitted(self)
n_samples = _num_samples(X)

y = np.full((n_samples, self.n_outputs_), self.constant_,
Expand Down
Loading