diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 5b0b753f0f294..305648dbdcfc9 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -32,6 +32,17 @@ Changelog - |Fix| Fixed a bug in :class:`decomposition.KernelPCA`'s ``inverse_transform``. :pr:`19732` by :user:`Kei Ishikawa `. +:mod:`sklearn.gaussian_process` +............................... + +- |Fix| Avoid division by zero when scaling constant target in + :class:`gaussian_process.GaussianProcessRegressor`. It was due to a std. dev. + equal to 0. Now, such case is detected and the std. dev. is affected to 1 + avoiding a division by zero and thus the presence of NaN values in the + normalized target. + :pr:`19703` by :user:`sobkevich`, :user:`Boris Villazón-Terrazas ` + and :user:`Alexandr Fonari `. + :mod:`sklearn.linear_model` ........................... diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 4e8814dd69951..8f9575ffe42df 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -14,6 +14,7 @@ from ..base import BaseEstimator, RegressorMixin, clone from ..base import MultiOutputMixin from .kernels import RBF, ConstantKernel as C +from ..preprocessing._data import _handle_zeros_in_scale from ..utils import check_random_state from ..utils.optimize import _check_optimize_result from ..utils.validation import _deprecate_positional_args @@ -197,7 +198,9 @@ def fit(self, X, y): # Normalize target value if self.normalize_y: self._y_train_mean = np.mean(y, axis=0) - self._y_train_std = np.std(y, axis=0) + self._y_train_std = _handle_zeros_in_scale( + np.std(y, axis=0), copy=False + ) # Remove mean and make unit variance y = (y - self._y_train_mean) / self._y_train_std diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index a5bfa05c47313..440e421cb95cc 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -546,3 +546,26 @@ def test_bound_check_fixed_hyperparameter(): periodicity_bounds="fixed") # seasonal component kernel = k1 + k2 GaussianProcessRegressor(kernel=kernel).fit(X, y) + + +# FIXME: we should test for multitargets as well. However, GPR is broken: +# see: https://github.com/scikit-learn/scikit-learn/pull/19706 +@pytest.mark.parametrize('kernel', kernels) +def test_constant_target(kernel): + """Check that the std. dev. is affected to 1 when normalizing a constant + feature. + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/18318 + NaN where affected to the target when scaling due to null std. dev. with + constant target. + """ + y_constant = np.ones(X.shape[0], dtype=np.float64) + + gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True) + gpr.fit(X, y_constant) + assert gpr._y_train_std == pytest.approx(1.0) + + y_pred, y_cov = gpr.predict(X, return_cov=True) + assert_allclose(y_pred, y_constant) + # set atol because we compare to zero + assert_allclose(np.diag(y_cov), 0., atol=1e-9)