Skip to content

Commit cba99e0

Browse files
glemaitreamueller
authored andcommitted
[MRG+2] FIX: make the deprecation in fit and not init in IsolationForest (#11574)
* FIX make the deprecation in fit instead than init * DOC mention version changed * FIX investigate weird failing test
1 parent f819704 commit cba99e0

File tree

3 files changed

+36
-20
lines changed

3 files changed

+36
-20
lines changed

sklearn/ensemble/iforest.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ class IsolationForest(BaseBagging, OutlierMixin):
7070
on the decision function. If 'auto', the decision function threshold is
7171
determined as in the original paper.
7272
73+
.. versionchanged:: 0.20
74+
The default value of ``contamination`` will change from 0.1 in 0.20
75+
to ``'auto'`` in 0.22.
76+
7377
max_features : int or float, optional (default=1.0)
7478
The number of features to draw from X to train each base estimator.
7579
@@ -150,12 +154,6 @@ def __init__(self,
150154
n_jobs=n_jobs,
151155
random_state=random_state,
152156
verbose=verbose)
153-
154-
if contamination == "legacy":
155-
warnings.warn('default contamination parameter 0.1 will change '
156-
'in version 0.22 to "auto". This will change the '
157-
'predict method behavior.',
158-
DeprecationWarning)
159157
self.contamination = contamination
160158

161159
def _set_oob_score(self, X, y):
@@ -178,6 +176,15 @@ def fit(self, X, y=None, sample_weight=None):
178176
-------
179177
self : object
180178
"""
179+
if self.contamination == "legacy":
180+
warnings.warn('default contamination parameter 0.1 will change '
181+
'in version 0.22 to "auto". This will change the '
182+
'predict method behavior.',
183+
FutureWarning)
184+
self._contamination = 0.1
185+
else:
186+
self._contamination = self.contamination
187+
181188
X = check_array(X, accept_sparse=['csc'])
182189
if issparse(X):
183190
# Pre-sort indices to avoid that each individual tree of the
@@ -219,19 +226,16 @@ def fit(self, X, y=None, sample_weight=None):
219226
max_depth=max_depth,
220227
sample_weight=sample_weight)
221228

222-
if self.contamination == "auto":
229+
if self._contamination == "auto":
223230
# 0.5 plays a special role as described in the original paper.
224231
# we take the opposite as we consider the opposite of their score.
225232
self.offset_ = -0.5
226233
# need to save (depreciated) threshold_ in this case:
227234
self._threshold_ = sp.stats.scoreatpercentile(
228235
self.score_samples(X), 100. * 0.1)
229-
elif self.contamination == "legacy": # to be rm in 0.22
230-
self.offset_ = sp.stats.scoreatpercentile(
231-
self.score_samples(X), 100. * 0.1)
232236
else:
233237
self.offset_ = sp.stats.scoreatpercentile(
234-
self.score_samples(X), 100. * self.contamination)
238+
self.score_samples(X), 100. * self._contamination)
235239

236240
return self
237241

sklearn/ensemble/tests/test_iforest.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def test_iforest():
6262
**params).fit(X_train).predict(X_test)
6363

6464

65+
@pytest.mark.filterwarnings('ignore:default contamination')
6566
def test_iforest_sparse():
6667
"""Check IForest for various parameter settings on sparse input."""
6768
rng = check_random_state(0)
@@ -89,6 +90,7 @@ def test_iforest_sparse():
8990
assert_array_equal(sparse_results, dense_results)
9091

9192

93+
@pytest.mark.filterwarnings('ignore:default contamination')
9294
def test_iforest_error():
9395
"""Test that it gives proper exception on deficient input."""
9496
X = iris.data
@@ -127,6 +129,7 @@ def test_iforest_error():
127129
assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])
128130

129131

132+
@pytest.mark.filterwarnings('ignore:default contamination')
130133
def test_recalculate_max_depth():
131134
"""Check max_depth recalculation when max_samples is reset to n_samples"""
132135
X = iris.data
@@ -135,6 +138,7 @@ def test_recalculate_max_depth():
135138
assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0]))))
136139

137140

141+
@pytest.mark.filterwarnings('ignore:default contamination')
138142
def test_max_samples_attribute():
139143
X = iris.data
140144
clf = IsolationForest().fit(X)
@@ -150,6 +154,7 @@ def test_max_samples_attribute():
150154
assert_equal(clf.max_samples_, 0.4*X.shape[0])
151155

152156

157+
@pytest.mark.filterwarnings('ignore:default contamination')
153158
def test_iforest_parallel_regression():
154159
"""Check parallel regression."""
155160
rng = check_random_state(0)
@@ -174,6 +179,7 @@ def test_iforest_parallel_regression():
174179
assert_array_almost_equal(y1, y3)
175180

176181

182+
@pytest.mark.filterwarnings('ignore:default contamination')
177183
def test_iforest_performance():
178184
"""Test Isolation Forest performs well"""
179185

@@ -213,13 +219,15 @@ def test_iforest_works():
213219
assert_array_equal(pred, 6 * [1] + 2 * [-1])
214220

215221

222+
@pytest.mark.filterwarnings('ignore:default contamination')
216223
def test_max_samples_consistency():
217224
# Make sure validated max_samples in iforest and BaseBagging are identical
218225
X = iris.data
219226
clf = IsolationForest().fit(X)
220227
assert_equal(clf.max_samples_, clf._max_samples)
221228

222229

230+
@pytest.mark.filterwarnings('ignore:default contamination')
223231
def test_iforest_subsampled_features():
224232
# It tests non-regression for #5732 which failed at predict.
225233
rng = check_random_state(0)
@@ -244,6 +252,7 @@ def test_iforest_average_path_length():
244252
[1., result_one, result_two], decimal=10)
245253

246254

255+
@pytest.mark.filterwarnings('ignore:default contamination')
247256
def test_score_samples():
248257
X_train = [[1, 1], [1, 2], [2, 1]]
249258
clf1 = IsolationForest(contamination=0.1).fit(X_train)
@@ -257,12 +266,15 @@ def test_score_samples():
257266

258267

259268
def test_deprecation():
260-
assert_warns_message(DeprecationWarning,
269+
X = [[0.0], [1.0]]
270+
clf = IsolationForest()
271+
272+
assert_warns_message(FutureWarning,
261273
'default contamination parameter 0.1 will change '
262274
'in version 0.22 to "auto"',
263-
IsolationForest, )
264-
X = [[0.0], [1.0]]
265-
clf = IsolationForest().fit(X)
275+
clf.fit, X)
276+
277+
clf = IsolationForest(contamination='auto').fit(X)
266278
assert_warns_message(DeprecationWarning,
267279
"threshold_ attribute is deprecated in 0.20 and will"
268280
" be removed in 0.22.",

sklearn/linear_model/tests/test_sag.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from sklearn.utils.extmath import row_norms
1818
from sklearn.utils.testing import assert_almost_equal
1919
from sklearn.utils.testing import assert_array_almost_equal
20+
from sklearn.utils.testing import assert_allclose
2021
from sklearn.utils.testing import assert_greater
2122
from sklearn.utils.testing import assert_raise_message
2223
from sklearn.utils.testing import ignore_warnings
@@ -269,7 +270,6 @@ def test_classifier_matching():
269270
assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
270271

271272

272-
@ignore_warnings
273273
def test_regressor_matching():
274274
n_samples = 10
275275
n_features = 5
@@ -295,10 +295,10 @@ def test_regressor_matching():
295295
dloss=squared_dloss,
296296
fit_intercept=fit_intercept)
297297

298-
assert_array_almost_equal(weights1, clf.coef_, decimal=10)
299-
assert_array_almost_equal(intercept1, clf.intercept_, decimal=10)
300-
assert_array_almost_equal(weights2, clf.coef_, decimal=10)
301-
assert_array_almost_equal(intercept2, clf.intercept_, decimal=10)
298+
assert_allclose(weights1, clf.coef_)
299+
assert_allclose(intercept1, clf.intercept_)
300+
assert_allclose(weights2, clf.coef_)
301+
assert_allclose(intercept2, clf.intercept_)
302302

303303

304304
@ignore_warnings

0 commit comments

Comments
 (0)