From ae15a0dbb51d87238b18e0bff0a99452868adbdf Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 27 Jan 2023 20:47:22 +0100 Subject: [PATCH 1/4] FIX make IsotonicRegression predict NumPy array --- doc/whats_new/v1.2.rst | 33 +++++++++++++++++++++++++++++ sklearn/isotonic.py | 38 ++++++++++++++++++---------------- sklearn/tests/test_isotonic.py | 22 ++++++++++++++++++++ 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 3dd5edfceea86..26649984991b1 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -2,6 +2,39 @@ .. currentmodule:: sklearn +.. _changes_1_2_2: + +Version 1.2.2 +============= + +**In development** + +The following estimators and functions, when fit with the same data and +parameters, may produce different models from the previous version. This often +occurs due to changes in the modelling logic (bug fixes or enhancements), or in +random sampling procedures. + +Changed models +-------------- + +- + +Changes impacting all modules +----------------------------- + +- + +Changelog +--------- + +:mod:`sklearn.isotonic` +....................... + +- |Fix| Fixes a bug in :class:`isotonic.IsotonicRegression` where + :meth:`isotonic.IsotonicRegression.predict` would return a pandas DataFrame + when `set_output` would be fixed to `"pandas"`. + :pr:`xxx` by :user:`Guillaume Lemaitre `. + .. _changes_1_2_1: Version 1.2.1 diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index b05e595368808..a32b3738bd050 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -360,23 +360,7 @@ def fit(self, X, y, sample_weight=None): self._build_f(X, y) return self - def transform(self, T): - """Transform new data by linear interpolation. - - Parameters - ---------- - T : array-like of shape (n_samples,) or (n_samples, 1) - Data to transform. - - .. versionchanged:: 0.24 - Also accepts 2d array with 1 feature. - - Returns - ------- - y_pred : ndarray of shape (n_samples,) - The transformed data. - """ - + def _transform(self, T): if hasattr(self, "X_thresholds_"): dtype = self.X_thresholds_.dtype else: @@ -397,6 +381,24 @@ def transform(self, T): return res + def transform(self, T): + """Transform new data by linear interpolation. + + Parameters + ---------- + T : array-like of shape (n_samples,) or (n_samples, 1) + Data to transform. + + .. versionchanged:: 0.24 + Also accepts 2d array with 1 feature. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + The transformed data. + """ + return self._transform(T) + def predict(self, T): """Predict new data by linear interpolation. @@ -410,7 +412,7 @@ def predict(self, T): y_pred : ndarray of shape (n_samples,) Transformed data. """ - return self.transform(T) + return self._transform(T) # We implement get_feature_names_out here instead of using # `ClassNamePrefixFeaturesOutMixin`` because `input_features` are ignored. diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 7c9dad8d1adb9..bcc26a294ebcc 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -5,6 +5,7 @@ import pytest +import sklearn from sklearn.datasets import make_regression from sklearn.isotonic import ( check_increasing, @@ -680,3 +681,24 @@ def test_get_feature_names_out(shape): assert isinstance(names, np.ndarray) assert names.dtype == object assert_array_equal(["isotonicregression0"], names) + + +def test_isotonic_regression_output_predict(): + """Check that `predict` does return the expected output type. + + We need to check that `transform` will output a DataFrame and a NumPy array + when we set `transform_output` to `pandas`. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/25499 + """ + pd = pytest.importorskip("pandas") + X, y = make_regression(n_samples=10, n_features=1, random_state=42) + regressor = IsotonicRegression() + with sklearn.config_context(transform_output="pandas"): + regressor.fit(X, y) + X_trans = regressor.transform(X) + y_pred = regressor.predict(X) + + assert isinstance(X_trans, pd.DataFrame) + assert isinstance(y_pred, np.ndarray) From 212b119a71192ef5be96592d9e504d40425e1d64 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Fri, 27 Jan 2023 20:49:46 +0100 Subject: [PATCH 2/4] DOC update pr number --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 26649984991b1..04014f8ee5697 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -33,7 +33,7 @@ Changelog - |Fix| Fixes a bug in :class:`isotonic.IsotonicRegression` where :meth:`isotonic.IsotonicRegression.predict` would return a pandas DataFrame when `set_output` would be fixed to `"pandas"`. - :pr:`xxx` by :user:`Guillaume Lemaitre `. + :pr:`25500` by :user:`Guillaume Lemaitre `. .. _changes_1_2_1: From 88b820571b5401f472112cb96c17aaffddd290f0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 30 Jan 2023 11:16:50 +0100 Subject: [PATCH 3/4] Update doc/whats_new/v1.2.rst Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 04014f8ee5697..c572acf49370c 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -32,7 +32,7 @@ Changelog - |Fix| Fixes a bug in :class:`isotonic.IsotonicRegression` where :meth:`isotonic.IsotonicRegression.predict` would return a pandas DataFrame - when `set_output` would be fixed to `"pandas"`. + when the global configuration sets `transform_output="pandas"`. :pr:`25500` by :user:`Guillaume Lemaitre `. .. _changes_1_2_1: From ddbe61b1cd3b65cd9cd25586fe138b3b06b23366 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 30 Jan 2023 11:22:40 +0100 Subject: [PATCH 4/4] DOC add docstring for _transform --- sklearn/isotonic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index a32b3738bd050..24d62dfe1c69a 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -361,6 +361,15 @@ def fit(self, X, y, sample_weight=None): return self def _transform(self, T): + """`_transform` is called by both `transform` and `predict` methods. + + Since `transform` is wrapped to output arrays of specific types (e.g. + NumPy arrays, pandas DataFrame), we cannot make `predict` call `transform` + directly. + + The above behaviour could be changed in the future, if we decide to output + other type of arrays when calling `predict`. + """ if hasattr(self, "X_thresholds_"): dtype = self.X_thresholds_.dtype else: