Skip to content

FIX GaussianProcessRegression(normalize_y=True).predict(X, return_cov=True) #19706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
41 changes: 35 additions & 6 deletions sklearn/gaussian_process/_gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,13 @@ def fit(self, X, y):
y = (y - self._y_train_mean) / self._y_train_std

else:
self._y_train_mean = np.zeros(1)
self._y_train_std = 1
if hasattr(y[0], "__len__"):
self._y_train_mean = np.zeros(y.shape[1])
self._y_train_std = np.ones(y.shape[1])
else:
self._y_train_mean = np.zeros(1)
self._y_train_std = 1


if np.iterable(self.alpha) \
and self.alpha.shape[0] != y.shape[0]:
Expand Down Expand Up @@ -305,11 +310,12 @@ def predict(self, X, return_std=False, return_cov=False):
y_mean : ndarray of shape (n_samples, [n_output_dims])
Mean of predictive distribution a query points.

y_std : ndarray of shape (n_samples,), optional
y_std : ndarray of shape (n_samples, [n_output_dims]), optional
Standard deviation of predictive distribution at query points.
Only returned when `return_std` is True.

y_cov : ndarray of shape (n_samples, n_samples), optional
y_cov : ndarray of shape (n_samples, n_samples, [n_output_dims]),
optional
Covariance of joint predictive distribution a query points.
Only returned when `return_cov` is True.
"""
Expand Down Expand Up @@ -352,7 +358,23 @@ def predict(self, X, return_std=False, return_cov=False):
y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6

# undo normalisation
y_cov = y_cov * self._y_train_std**2

# for multitarget data:
if hasattr(self._y_train_std, "__len__"):
y_cov_x = y_cov.shape[0]
y_cov_y = y_cov.shape[1]
y_cov_copy = y_cov.reshape((y_cov_x, y_cov_y, 1))
y_cov = np.zeros((y_cov_x, y_cov_x,
self._y_train_std.shape[0]))
idx = 0
for line in y_cov_copy:
line = line * self._y_train_std**2
y_cov[idx] = line
idx += 1

# for single target data
else:
y_cov = y_cov * self._y_train_std**2

return y_mean, y_cov
elif return_std:
Expand All @@ -378,7 +400,14 @@ def predict(self, X, return_std=False, return_cov=False):
y_var[y_var_negative] = 0.0

# undo normalisation
y_var = y_var * self._y_train_std**2

# for multitarget data
if hasattr(self._y_train_std, "__len__"):
y_var = y_var.reshape((-1, 1))
y_var = y_var * self._y_train_std**2
# for single target data
else:
y_var = y_var * self._y_train_std**2

return y_mean, np.sqrt(y_var)
else:
Expand Down
7 changes: 5 additions & 2 deletions sklearn/gaussian_process/tests/test_gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,11 @@ def test_y_multioutput():
assert_almost_equal(y_pred_1d, y_pred_2d[:, 1] / 2)

# Standard deviation and covariance do not depend on output
assert_almost_equal(y_std_1d, y_std_2d)
assert_almost_equal(y_cov_1d, y_cov_2d)
assert_almost_equal(y_std_1d, y_std_2d[:, 0])
assert_almost_equal(y_std_1d, y_std_2d[:, 1])

assert_almost_equal(y_cov_1d, y_cov_2d[:, :, 0])
assert_almost_equal(y_cov_1d, y_cov_2d[:, :, 1])

y_sample_1d = gpr.sample_y(X2, n_samples=10)
y_sample_2d = gpr_2d.sample_y(X2, n_samples=10)
Expand Down