Skip to content

Commit c00a9e9

Browse files
thomasjpfanogrisel
authored andcommitted
FIX Non-fit methods no long raises UserWarning for valid dataframes (#21199)
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
1 parent bb0dcc7 commit c00a9e9

File tree

8 files changed

+40
-23
lines changed

8 files changed

+40
-23
lines changed

doc/whats_new/v1.0.rst

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,23 @@
55
.. _changes_1_0_1:
66

77
Version 1.0.1
8-
=============
98

109
**In Development**
1110

1211
Changelog
1312
---------
1413

14+
Fixed models
15+
------------
16+
17+
- |Fix| Non-fit methods in the following classes do not raise a UserWarning
18+
when fitted on DataFrames with valid feature names:
19+
:class:`covariance.EllipticEnvelope`, :class:`ensemble.IsolationForest`,
20+
:class:`ensemble.AdaBoostClassifier`, :class:`neighbors.KNeighborsClassifier`,
21+
:class:`neighbors.KNeighborsRegressor`,
22+
:class:`neighbors.RadiusNeighborsClassifier`,
23+
:class:`neighbors.RadiusNeighborsRegressor`. :pr:`21199` by `Thomas Fan`_.
24+
1525
:mod:`sklearn.calibration`
1626
..........................
1727

@@ -25,6 +35,17 @@ Changelog
2535
the Bayesian priors.
2636
:pr:`21179` by :user:`Guillaume Lemaitre <glemaitre>`.
2737

38+
:mod:`sklearn.neighbors`
39+
........................
40+
41+
- |Fix| :class:`neighbors.KNeighborsClassifier`,
42+
:class:`neighbors.KNeighborsRegressor`,
43+
:class:`neighbors.RadiusNeighborsClassifier`,
44+
:class:`neighbors.RadiusNeighborsRegressor` with `metric="precomputed"` raises
45+
an error for `bsr` and `dok` sparse matrices in methods: `fit`, `kneighbors`
46+
and `radius_neighbors`, due to handling of explicit zeros in `bsr` and `dok`
47+
:term:`sparse graph` formats. :pr:`21199` by `Thomas Fan`_.
48+
2849
.. _changes_1_0:
2950

3051
Version 1.0.0

sklearn/covariance/_elliptic_envelope.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ def score_samples(self, X):
215215
Opposite of the Mahalanobis distances.
216216
"""
217217
check_is_fitted(self)
218-
X = self._validate_data(X, reset=False)
219218
return -self.mahalanobis(X)
220219

221220
def predict(self, X):

sklearn/ensemble/_iforest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ def predict(self, X):
337337
be considered as an inlier according to the fitted model.
338338
"""
339339
check_is_fitted(self)
340-
X = self._validate_data(X, accept_sparse="csr", reset=False)
341-
is_inlier = np.ones(X.shape[0], dtype=int)
342-
is_inlier[self.decision_function(X) < 0] = -1
340+
decision_func = self.decision_function(X)
341+
is_inlier = np.ones_like(decision_func, dtype=int)
342+
is_inlier[decision_func < 0] = -1
343343
return is_inlier
344344

345345
def decision_function(self, X):

sklearn/ensemble/_weight_boosting.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,6 @@ def predict(self, X):
676676
y : ndarray of shape (n_samples,)
677677
The predicted classes.
678678
"""
679-
X = self._check_X(X)
680-
681679
pred = self.decision_function(X)
682680

683681
if self.n_classes_ == 2:
@@ -852,8 +850,6 @@ def predict_proba(self, X):
852850
outputs is the same of that of the :term:`classes_` attribute.
853851
"""
854852
check_is_fitted(self)
855-
X = self._check_X(X)
856-
857853
n_classes = self.n_classes_
858854

859855
if n_classes == 1:
@@ -886,7 +882,6 @@ def staged_predict_proba(self, X):
886882
The class probabilities of the input samples. The order of
887883
outputs is the same of that of the :term:`classes_` attribute.
888884
"""
889-
X = self._check_X(X)
890885

891886
n_classes = self.n_classes_
892887

@@ -912,7 +907,6 @@ def predict_log_proba(self, X):
912907
The class probabilities of the input samples. The order of
913908
outputs is the same of that of the :term:`classes_` attribute.
914909
"""
915-
X = self._check_X(X)
916910
return np.log(self.predict_proba(X))
917911

918912

sklearn/neighbors/_classification.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,6 @@ def predict(self, X):
211211
y : ndarray of shape (n_queries,) or (n_queries, n_outputs)
212212
Class labels for each data sample.
213213
"""
214-
X = self._validate_data(X, accept_sparse="csr", reset=False)
215-
216214
neigh_dist, neigh_ind = self.kneighbors(X)
217215
classes_ = self.classes_
218216
_y = self._y
@@ -255,8 +253,6 @@ def predict_proba(self, X):
255253
The class probabilities of the input samples. Classes are ordered
256254
by lexicographic order.
257255
"""
258-
X = self._validate_data(X, accept_sparse="csr", reset=False)
259-
260256
neigh_dist, neigh_ind = self.kneighbors(X)
261257

262258
classes_ = self.classes_
@@ -271,7 +267,7 @@ def predict_proba(self, X):
271267
if weights is None:
272268
weights = np.ones_like(neigh_ind)
273269

274-
all_rows = np.arange(X.shape[0])
270+
all_rows = np.arange(n_queries)
275271
probabilities = []
276272
for k, classes_k in enumerate(classes_):
277273
pred_labels = _y[:, k][neigh_ind]
@@ -614,7 +610,6 @@ def predict_proba(self, X):
614610
by lexicographic order.
615611
"""
616612

617-
X = self._validate_data(X, accept_sparse="csr", reset=False)
618613
n_queries = _num_samples(X)
619614

620615
neigh_dist, neigh_ind = self.radius_neighbors(X)

sklearn/neighbors/_regression.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,6 @@ def predict(self, X):
226226
y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int
227227
Target values.
228228
"""
229-
X = self._validate_data(X, accept_sparse="csr", reset=False)
230-
231229
neigh_dist, neigh_ind = self.kneighbors(X)
232230

233231
weights = _get_weights(neigh_dist, self.weights)
@@ -436,8 +434,6 @@ def predict(self, X):
436434
dtype=double
437435
Target values.
438436
"""
439-
X = self._validate_data(X, accept_sparse="csr", reset=False)
440-
441437
neigh_dist, neigh_ind = self.radius_neighbors(X)
442438

443439
weights = _get_weights(neigh_dist, self.weights)

sklearn/neighbors/tests/test_neighbors.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,12 @@ def test_kneighbors_regressor_sparse(
10881088
assert np.mean(knn.predict(X2).round() == y) > 0.95
10891089

10901090
X2_pre = sparsev(pairwise_distances(X, metric="euclidean"))
1091-
assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95
1091+
if sparsev in {dok_matrix, bsr_matrix}:
1092+
msg = "not supported due to its handling of explicit zeros"
1093+
with pytest.raises(TypeError, match=msg):
1094+
knn_pre.predict(X2_pre)
1095+
else:
1096+
assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95
10921097

10931098

10941099
def test_neighbors_iris():

sklearn/utils/estimator_checks.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3779,7 +3779,14 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
37793779
check_methods.append((method, callable_method))
37803780

37813781
for _, method in check_methods:
3782-
method(X) # works
3782+
with warnings.catch_warnings():
3783+
warnings.filterwarnings(
3784+
"error",
3785+
message="X does not have valid feature names",
3786+
category=UserWarning,
3787+
module="sklearn",
3788+
)
3789+
method(X) # works without UserWarning for valid features
37833790

37843791
invalid_names = [
37853792
(names[::-1], "Feature names must be in the same order as they were in fit."),

0 commit comments

Comments
 (0)