Skip to content

Commit 7eff50e

Browse files
committed
much faster isotonic regression prediction (involved re-setting interpolation to linear)
1 parent 98c6edf commit 7eff50e

File tree

3 files changed

+87
-8
lines changed

3 files changed

+87
-8
lines changed

doc/whats_new.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ Enhancements
8888
- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
8989
By `Bernardo Stein`_.
9090

91+
- Prediction of out-of-sample events with Isotonic Regression is now much
92+
faster (over 1000x in tests with synthetic data). By `Jonathan Arfa`_.
93+
9194

9295
Bug fixes
9396
.........
@@ -127,6 +130,9 @@ API changes summary
127130
- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
128131
Use ``loss`` instead. By `Manoj Kumar`_.
129132

133+
- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
134+
:class:`isotonic.IsotonicRegression`. By `Jonathan Arfa`_.
135+
130136

131137
.. _changes_0_17_1:
132138

@@ -4037,3 +4043,5 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
40374043
.. _Imaculate: https://github.com/Imaculate
40384044

40394045
.. _Bernardo Stein: https://github.com/DanielSidhion
4046+
4047+
.. _Jonathan Arfa: https://github.com/jarfa

sklearn/isotonic.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from scipy import interpolate
88
from scipy.stats import spearmanr
99
from .base import BaseEstimator, TransformerMixin, RegressorMixin
10-
from .utils import as_float_array, check_array, check_consistent_length
10+
from .utils import as_float_array, check_array, check_consistent_length, deprecated
1111
from .utils.fixes import astype
1212
from ._isotonic import _isotonic_regression, _make_unique
1313
import warnings
@@ -234,6 +234,32 @@ def __init__(self, y_min=None, y_max=None, increasing=True,
234234
self.increasing = increasing
235235
self.out_of_bounds = out_of_bounds
236236

237+
@property
238+
@deprecated("Attribute ``X_`` is deprecated in version 0.18 and will be removed in version 0.20.")
239+
def X_(self):
240+
return self._X_
241+
242+
@X_.setter
243+
def X_(self, value):
244+
self._X_ = value
245+
246+
@X_.deleter
247+
def X_(self):
248+
del self._X_
249+
250+
@property
251+
@deprecated("Attribute ``y_`` is deprecated in version 0.18 and will be removed in version 0.20.")
252+
def y_(self):
253+
return self._y_
254+
255+
@y_.setter
256+
def y_(self, value):
257+
self._y_ = value
258+
259+
@y_.deleter
260+
def y_(self):
261+
del self._y_
262+
237263
def _check_fit_data(self, X, y, sample_weight=None):
238264
if len(X.shape) != 1:
239265
raise ValueError("X should be a 1d array")
@@ -252,7 +278,7 @@ def _build_f(self, X, y):
252278
# single y, constant prediction
253279
self.f_ = lambda x: y.repeat(x.shape)
254280
else:
255-
self.f_ = interpolate.interp1d(X, y, kind='slinear',
281+
self.f_ = interpolate.interp1d(X, y, kind='linear',
256282
bounds_error=bounds_error)
257283

258284
def _build_y(self, X, y, sample_weight):
@@ -282,8 +308,8 @@ def _build_y(self, X, y, sample_weight):
282308
X, y, sample_weight = [astype(array[order], np.float64, copy=False)
283309
for array in [X, y, sample_weight]]
284310
unique_X, unique_y, unique_sample_weight = _make_unique(X, y, sample_weight)
285-
self.X_ = unique_X
286-
self.y_ = isotonic_regression(unique_y, unique_sample_weight, self.y_min,
311+
self._X_ = unique_X
312+
self._y_ = isotonic_regression(unique_y, unique_sample_weight, self.y_min,
287313
self.y_max, increasing=self.increasing_)
288314

289315
return order_inv
@@ -317,11 +343,24 @@ def fit(self, X, y, sample_weight=None):
317343
self._build_y(X, y, sample_weight)
318344

319345
# Handle the left and right bounds on X
320-
self.X_min_ = np.min(self.X_)
321-
self.X_max_ = np.max(self.X_)
346+
self.X_min_ = np.min(self._X_)
347+
self.X_max_ = np.max(self._X_)
348+
349+
# Remove unnecessary points for faster prediction
350+
keep_data = np.ones((len(self._y_),), dtype=bool)
351+
# Aside from the 1st and last point, remove points whose y values
352+
# are equal to both the point before and the point after it.
353+
keep_data[1:-1] = np.logical_or(
354+
np.not_equal(self._y_[1:-1], self._y_[:-2]),
355+
np.not_equal(self._y_[1:-1], self._y_[2:])
356+
)
357+
# We're keeping self.X_ and self.y_ around for backwards compatibility,
358+
# but they should be considered deprecated.
359+
self._necessary_X_ = self._X_[keep_data]
360+
self._necessary_y_ = self._y_[keep_data]
322361

323362
# Build f_
324-
self._build_f(self.X_, self.y_)
363+
self._build_f(self._necessary_X_, self._necessary_y_)
325364

326365
return self
327366

@@ -381,4 +420,4 @@ def __setstate__(self, state):
381420
We need to rebuild the interpolation function.
382421
"""
383422
self.__dict__.update(state)
384-
self._build_f(self.X_, self.y_)
423+
self._build_f(self._necessary_X_, self._necessary_y_)

sklearn/tests/test_isotonic.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,35 @@ def test_isotonic_zero_weight_loop():
346346

347347
# This will hang in failure case.
348348
regression.fit(x, y, sample_weight=w)
349+
350+
351+
def test_fast_predict():
352+
# test that the faster prediction (https://github.com/scikit-learn/scikit-learn/pull/6206)
353+
# change doesn't affect out-of-sample predictions.
354+
rng = np.random.RandomState(123)
355+
N = 10**3
356+
# X values over the -10,10 range
357+
training_X = 20.0 * rng.rand(N) - 10
358+
training_Y = np.less(
359+
rng.rand(N),
360+
1.0 / (1.0 + np.exp(-training_X))
361+
).astype('int64')
362+
363+
slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
364+
fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
365+
366+
# fit with ALL input data, not just necessary
367+
# this code is taken from the .fit() method, without removing unnecessary points
368+
slow_model._build_y(training_X, training_Y, None)
369+
slow_model.X_min_ = np.min(slow_model._X_)
370+
slow_model.X_max_ = np.max(slow_model._X_)
371+
slow_model._build_f(slow_model._X_, slow_model._y_)
372+
373+
#fit with just the necessary data
374+
fast_model.fit(training_X, training_Y)
375+
376+
pred_X = 20.0 * rng.rand(N) - 10
377+
pred_slow = slow_model.predict(pred_X)
378+
pred_fast = fast_model.predict(pred_X)
379+
380+
assert_array_equal(pred_slow, pred_fast)

0 commit comments

Comments
 (0)