From 0eec61b6ce33d02192b85ab29352d2aef0e78236 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Tue, 17 Aug 2021 01:03:44 -0300 Subject: [PATCH 01/21] fix: GaussianProcessRegressor fails to compute y_std when n_targets > 1 It could not retrieve y_std for predict(X, return_std=True) when n_targets were bigger than 1. This happened because line 415 in file "sklearn/gaussian_process/_gpr.py" tried to multiply y_var * self._y_train_std ** 2 using simple multiplication ( a1 * a2). However, it fails when self._y_train_std has more than one feature (when n_targets is more than 1), so we need to implement this multiplication using np.outer product, because it will handle the conventional scalar-array multiplication for each output feature (self._y_train_std contains one normalization rate for each output feature). --- sklearn/gaussian_process/_gpr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index d1c46942b0640..48dbefac93d68 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -337,7 +337,7 @@ def predict(self, X, return_std=False, return_cov=False): y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets) Mean of predictive distribution a query points. - y_std : ndarray of shape (n_samples,), optional + y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional Standard deviation of predictive distribution at query points. Only returned when `return_std` is True. @@ -412,7 +412,7 @@ def predict(self, X, return_std=False, return_cov=False): y_var[y_var_negative] = 0.0 # undo normalisation - y_var = y_var * self._y_train_std ** 2 + y_var = np.outer(y_var, self._y_train_std ** 2) return y_mean, np.sqrt(y_var) else: From e6b7c2eb3666abef1e14e7582310740d86150ccd Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Tue, 24 Aug 2021 21:07:22 -0300 Subject: [PATCH 02/21] fix: Reshape to (n_samples,) instead of (n_samples, 1) if single target --- sklearn/gaussian_process/_gpr.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 48dbefac93d68..633cadc6f8e97 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -414,6 +414,10 @@ def predict(self, X, return_std=False, return_cov=False): # undo normalisation y_var = np.outer(y_var, self._y_train_std ** 2) + # if y_var has shape (n_samples, 1), reshape to (n_samples,) + if y_var.shape[0] == y_var.size: + y_var = y_var.reshape(-1) + return y_mean, np.sqrt(y_var) else: return y_mean From d59a8253b494a2d65d567b46f71b512e1fb0682a Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 02:16:58 -0300 Subject: [PATCH 03/21] feature: Add an entry in doc/whats_new, explaining changes and credits. --- doc/whats_new/v1.0.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 57206230b8d65..f878c376fa615 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -823,6 +823,14 @@ Changelog :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in 1.2. :pr:`20165` by `Thomas Fan`_. +:mod:`sklearn.gaussian_process` +......................... + +- |Fix| Compute 'y_std' properly with multi-target in + :class:`sklearn.gaussian_process.GaussianProcessRegressor`. It could not + predict when `return_std` and `normalize_y` where True, in multi-target scene. + :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. + Code and Documentation Contributors ----------------------------------- From c321c32da0626ac522f73e98e13daaae51bb6746 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 02:56:22 -0300 Subject: [PATCH 04/21] feature: Add a non-regression test for the issue being fixed. --- sklearn/gaussian_process/tests/test_gpr.py | 105 +++++++++++++-------- 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index b641be30a824a..201de617785b5 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -223,9 +223,9 @@ def test_random_starts(): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = ( - np.sin(X).sum(axis=1) - + np.sin(3 * X).sum(axis=1) - + rng.normal(scale=0.1, size=n_samples) + np.sin(X).sum(axis=1) + + np.sin(3 * X).sum(axis=1) + + rng.normal(scale=0.1, size=n_samples) ) kernel = C(1.0, (1e-2, 1e2)) * RBF( @@ -411,10 +411,10 @@ def test_gpr_correct_error_message(): kernel = DotProduct() gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) message = ( - "The kernel, %s, is not returning a " - "positive definite matrix. Try gradually increasing " - "the 'alpha' parameter of your " - "GaussianProcessRegressor estimator." % kernel + "The kernel, %s, is not returning a " + "positive definite matrix. Try gradually increasing " + "the 'alpha' parameter of your " + "GaussianProcessRegressor estimator." % kernel ) with pytest.raises(np.linalg.LinAlgError, match=re.escape(message)): gpr.fit(X, y) @@ -483,23 +483,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k1__noise_level is close to the " - "specified upper bound 0.001. " - "Increasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k1__noise_level is close to the " + "specified upper bound 0.001. " + "Increasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k2__length_scale is close to the " - "specified lower bound 1000.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k2__length_scale is close to the " + "specified lower bound 1000.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) X_tile = np.tile(X, 2) @@ -514,23 +514,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 1 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 1 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) @@ -620,12 +620,12 @@ def test_gpr_consistency_std_cov_non_invertible_kernel(): ({"kernel": RBF(), "optimizer": "unknown"}, ValueError, "Unknown optimizer"), ({"alpha": np.zeros(100)}, ValueError, "alpha must be a scalar or an array"), ( - { - "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), - "n_restarts_optimizer": 2, - }, - ValueError, - "requires that all bounds are finite", + { + "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), + "n_restarts_optimizer": 2, + }, + ValueError, + "requires that all bounds are finite", ), ], ) @@ -652,3 +652,26 @@ def test_gpr_predict_error(): err_msg = "At most one of return_std or return_cov can be requested." with pytest.raises(RuntimeError, match=err_msg): gpr.predict(X, return_cov=True, return_std=True) + + +def test_y_std_with_multitarget_normalized(): + """ + Regression test for issues #17394 and #18065. + Check if GPR can compute y_std in predict() method when normalize_y==True in + multi-target regression. + """ + X_train = np.random.rand((11, 10)) + y_train = np.random.rand((11, 6)) # 6 target features -> multi-target + X_test = np.random.rand((4, 10)) + + # Generic kernel + kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) + kernel *= kernels.RBF(10.0, (1e-3, 1e3)) + + # normalize_y == True + model = GaussianProcessRegressor(kernel=kernel, + n_restarts_optimizer=10, + alpha=0.1, + normalize_y=True) + model.fit(X_train, y_train) + y_pred, std = model.predict(X_test, return_std=True) From 7acf698bd17b6fb862b08ca3861947d1a1290368 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 03:15:53 -0300 Subject: [PATCH 05/21] fix: Linting error in tests file. --- sklearn/gaussian_process/tests/test_gpr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 201de617785b5..bafc54216ad97 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -660,9 +660,9 @@ def test_y_std_with_multitarget_normalized(): Check if GPR can compute y_std in predict() method when normalize_y==True in multi-target regression. """ - X_train = np.random.rand((11, 10)) + x_train = np.random.rand((11, 10)) y_train = np.random.rand((11, 6)) # 6 target features -> multi-target - X_test = np.random.rand((4, 10)) + x_test = np.random.rand((4, 10)) # Generic kernel kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) @@ -673,5 +673,5 @@ def test_y_std_with_multitarget_normalized(): n_restarts_optimizer=10, alpha=0.1, normalize_y=True) - model.fit(X_train, y_train) - y_pred, std = model.predict(X_test, return_std=True) + model.fit(x_train, y_train) + y_pred, std = model.predict(x_test, return_std=True) From acc4458572060789c8f6c3c74c9db7307d04fc49 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 03:25:35 -0300 Subject: [PATCH 06/21] fix: Persistent linting error in tests file. --- sklearn/gaussian_process/tests/test_gpr.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index bafc54216ad97..d5b73568fd9a5 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -657,21 +657,19 @@ def test_gpr_predict_error(): def test_y_std_with_multitarget_normalized(): """ Regression test for issues #17394 and #18065. - Check if GPR can compute y_std in predict() method when normalize_y==True in - multi-target regression. + Check if GPR can compute y_std in predict() method when normalize_y==True + in multi-target regression. """ - x_train = np.random.rand((11, 10)) - y_train = np.random.rand((11, 6)) # 6 target features -> multi-target - x_test = np.random.rand((4, 10)) + X_train = np.random.rand((11, 10)) + # 6 target features -> multi-target + y_train = np.random.rand((11, 6)) + X_test = np.random.rand((4, 10)) # Generic kernel kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) kernel *= kernels.RBF(10.0, (1e-3, 1e3)) # normalize_y == True - model = GaussianProcessRegressor(kernel=kernel, - n_restarts_optimizer=10, - alpha=0.1, - normalize_y=True) - model.fit(x_train, y_train) - y_pred, std = model.predict(x_test, return_std=True) + model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True) + model.fit(X_train, y_train) + y_pred, std = model.predict(X_test, return_std=True) From 7d0d97a40a4b885fc37892fc52decc01eb41fd4f Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 03:30:29 -0300 Subject: [PATCH 07/21] checkpoint: Testing linting demands --- sklearn/gaussian_process/tests/test_gpr.py | 25 ++-------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index d5b73568fd9a5..c182e7ded4f04 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -45,7 +45,7 @@ def f(x): + C(1e-5, (1e-5, 1e2)), C(0.1, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)), -] + ] non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel] @@ -179,7 +179,7 @@ def test_sample_statistics(kernel): np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1, - ) + ) def test_no_optimizer(): @@ -652,24 +652,3 @@ def test_gpr_predict_error(): err_msg = "At most one of return_std or return_cov can be requested." with pytest.raises(RuntimeError, match=err_msg): gpr.predict(X, return_cov=True, return_std=True) - - -def test_y_std_with_multitarget_normalized(): - """ - Regression test for issues #17394 and #18065. - Check if GPR can compute y_std in predict() method when normalize_y==True - in multi-target regression. - """ - X_train = np.random.rand((11, 10)) - # 6 target features -> multi-target - y_train = np.random.rand((11, 6)) - X_test = np.random.rand((4, 10)) - - # Generic kernel - kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) - kernel *= kernels.RBF(10.0, (1e-3, 1e3)) - - # normalize_y == True - model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True) - model.fit(X_train, y_train) - y_pred, std = model.predict(X_test, return_std=True) From eecb8bc9dab2adc1b5bee0c1758291c49207e1ec Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 03:36:29 -0300 Subject: [PATCH 08/21] feature: Added whats_new entry and non-regressive test. ToDo: Fix linting error. --- sklearn/gaussian_process/tests/test_gpr.py | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index c182e7ded4f04..70f4c40683aa6 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -45,7 +45,7 @@ def f(x): + C(1e-5, (1e-5, 1e2)), C(0.1, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)), - ] +] non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel] @@ -179,7 +179,7 @@ def test_sample_statistics(kernel): np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1, - ) + ) def test_no_optimizer(): @@ -652,3 +652,27 @@ def test_gpr_predict_error(): err_msg = "At most one of return_std or return_cov can be requested." with pytest.raises(RuntimeError, match=err_msg): gpr.predict(X, return_cov=True, return_std=True) + + +def test_y_std_with_multitarget_normalized(): + """ + Regression test for issues #17394 and #18065. + Check if GPR can compute y_std in predict() method when normalize_y==True + in multi-target regression. + """ + X_train = np.random.rand((11, 10)) + # 6 target features -> multi-target + y_train = np.random.rand((11, 6)) + X_test = np.random.rand((4, 10)) + + # Generic kernel + kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) + kernel *= kernels.RBF(10.0, (1e-3, 1e3)) + + # normalize_y == True + model = GaussianProcessRegressor(kernel=kernel, + n_restarts_optimizer=10, + alpha=0.1, + normalize_y=True) + model.fit(X_train, y_train) + y_pred, std = model.predict(X_test, return_std=True) From df652ae3eae3194c1bdbbc50d8c03a7e71105462 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 10:48:59 -0300 Subject: [PATCH 09/21] fix: Fix whats_new formatting. --- doc/whats_new/v1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index f878c376fa615..2fb003adcd68c 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -824,9 +824,9 @@ Changelog 1.2. :pr:`20165` by `Thomas Fan`_. :mod:`sklearn.gaussian_process` -......................... +............................... -- |Fix| Compute 'y_std' properly with multi-target in +- |Fix| Compute `y_std` properly with multi-target in :class:`sklearn.gaussian_process.GaussianProcessRegressor`. It could not predict when `return_std` and `normalize_y` where True, in multi-target scene. :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. From 937b00a8d0a29fe5e28df97709edf2e907fdbd6e Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 10:56:37 -0300 Subject: [PATCH 10/21] fix: test_gpr.py formatting fixed using black. --- sklearn/gaussian_process/tests/test_gpr.py | 128 +++++++++++---------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 70f4c40683aa6..554881840551b 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -223,9 +223,9 @@ def test_random_starts(): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = ( - np.sin(X).sum(axis=1) - + np.sin(3 * X).sum(axis=1) - + rng.normal(scale=0.1, size=n_samples) + np.sin(X).sum(axis=1) + + np.sin(3 * X).sum(axis=1) + + rng.normal(scale=0.1, size=n_samples) ) kernel = C(1.0, (1e-2, 1e2)) * RBF( @@ -234,9 +234,7 @@ def test_random_starts(): last_lml = -np.inf for n_restarts_optimizer in range(5): gp = GaussianProcessRegressor( - kernel=kernel, - n_restarts_optimizer=n_restarts_optimizer, - random_state=0, + kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0 ).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) assert lml > last_lml - np.finfo(np.float32).eps @@ -385,8 +383,9 @@ def test_custom_optimizer(kernel): # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) - theta_opt, func_min = initial_theta, obj_func( - initial_theta, eval_gradient=False + theta_opt, func_min = ( + initial_theta, + obj_func(initial_theta, eval_gradient=False), ) for _ in range(50): theta = np.atleast_1d( @@ -411,10 +410,10 @@ def test_gpr_correct_error_message(): kernel = DotProduct() gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) message = ( - "The kernel, %s, is not returning a " - "positive definite matrix. Try gradually increasing " - "the 'alpha' parameter of your " - "GaussianProcessRegressor estimator." % kernel + "The kernel, %s, is not returning a " + "positive definite matrix. Try gradually increasing " + "the 'alpha' parameter of your " + "GaussianProcessRegressor estimator." % kernel ) with pytest.raises(np.linalg.LinAlgError, match=re.escape(message)): gpr.fit(X, y) @@ -483,23 +482,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k1__noise_level is close to the " - "specified upper bound 0.001. " - "Increasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k1__noise_level is close to the " + "specified upper bound 0.001. " + "Increasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k2__length_scale is close to the " - "specified lower bound 1000.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k2__length_scale is close to the " + "specified lower bound 1000.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) X_tile = np.tile(X, 2) @@ -514,23 +513,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 1 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 1 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) @@ -620,12 +619,12 @@ def test_gpr_consistency_std_cov_non_invertible_kernel(): ({"kernel": RBF(), "optimizer": "unknown"}, ValueError, "Unknown optimizer"), ({"alpha": np.zeros(100)}, ValueError, "alpha must be a scalar or an array"), ( - { - "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), - "n_restarts_optimizer": 2, - }, - ValueError, - "requires that all bounds are finite", + { + "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), + "n_restarts_optimizer": 2, + }, + ValueError, + "requires that all bounds are finite", ), ], ) @@ -655,24 +654,27 @@ def test_gpr_predict_error(): def test_y_std_with_multitarget_normalized(): + """Check that `y_std` is properly computed when `normalize_y=True`. + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/17394 + https://github.com/scikit-learn/scikit-learn/issues/18065 """ - Regression test for issues #17394 and #18065. - Check if GPR can compute y_std in predict() method when normalize_y==True - in multi-target regression. - """ - X_train = np.random.rand((11, 10)) - # 6 target features -> multi-target - y_train = np.random.rand((11, 6)) - X_test = np.random.rand((4, 10)) + rng = np.random.RandomState(42) + + n_samples, n_features, n_targets = 12, 10, 6 + + X_train = rng.randn(n_samples, n_features) + y_train = rng.randn(n_samples, n_targets) + X_test = rng.randn(n_samples, n_features) # Generic kernel - kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) - kernel *= kernels.RBF(10.0, (1e-3, 1e3)) - - # normalize_y == True - model = GaussianProcessRegressor(kernel=kernel, - n_restarts_optimizer=10, - alpha=0.1, - normalize_y=True) + kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) * kernels.RBF(10.0, (1e-3, 1e3)) + + model = GaussianProcessRegressor( + kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True + ) model.fit(X_train, y_train) - y_pred, std = model.predict(X_test, return_std=True) + y_pred, y_std = model.predict(X_test, return_std=True) + + assert y_pred.shape == (n_samples, n_targets) + assert y_std.shape == (n_samples, n_targets) From 8a17971901f802058f0b4e35fd1f480ee2b8ad07 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 12:22:04 -0300 Subject: [PATCH 11/21] fix: There was a typo in test function. --- sklearn/gaussian_process/tests/test_gpr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 554881840551b..68dc41cc4173b 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -668,7 +668,7 @@ def test_y_std_with_multitarget_normalized(): X_test = rng.randn(n_samples, n_features) # Generic kernel - kernel = kernels.ConstantKernel(1.0, (1e-1, 1e3)) * kernels.RBF(10.0, (1e-3, 1e3)) + kernel = WhiteKernel(1.0, (1e-1, 1e3)) * C(10.0, (1e-3, 1e3)) model = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=10, alpha=0.1, normalize_y=True From b8f7f3d485876133efa2cfe0f1d703cdfdf52535 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Thu, 9 Sep 2021 14:13:49 -0300 Subject: [PATCH 12/21] checkpoint: Testing pipeline execution. --- sklearn/gaussian_process/tests/test_gpr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 68dc41cc4173b..fedd4f025856a 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -659,7 +659,7 @@ def test_y_std_with_multitarget_normalized(): https://github.com/scikit-learn/scikit-learn/issues/17394 https://github.com/scikit-learn/scikit-learn/issues/18065 """ - rng = np.random.RandomState(42) + rng = np.random.RandomState(1234) n_samples, n_features, n_targets = 12, 10, 6 From 18871a71946552ee840187d683a9309004721350 Mon Sep 17 00:00:00 2001 From: Patrick Ferreira Date: Sat, 9 Oct 2021 01:28:11 -0300 Subject: [PATCH 13/21] refactor: Reformat sklearn/gaussian_process/tests/test_gpr.py Co-authored-by: Julien Jerphanion --- sklearn/gaussian_process/tests/test_gpr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index fedd4f025856a..8a0e1e7683c96 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -654,7 +654,8 @@ def test_gpr_predict_error(): def test_y_std_with_multitarget_normalized(): - """Check that `y_std` is properly computed when `normalize_y=True`. + """Check the proper normalization of `y_std` in multi-target scene. + Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/17394 https://github.com/scikit-learn/scikit-learn/issues/18065 From 1dc94bdeee60e6d6cafdf39842cf7fda58dd52ba Mon Sep 17 00:00:00 2001 From: Patrick Ferreira Date: Sat, 9 Oct 2021 01:30:41 -0300 Subject: [PATCH 14/21] fix: doc/whats_new/v1.0.rst pattern Co-authored-by: Julien Jerphanion --- doc/whats_new/v1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 2fb003adcd68c..1726d5b56cb91 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -827,8 +827,8 @@ Changelog ............................... - |Fix| Compute `y_std` properly with multi-target in - :class:`sklearn.gaussian_process.GaussianProcessRegressor`. It could not - predict when `return_std` and `normalize_y` where True, in multi-target scene. + :class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing + proper normalization in multi-target scene. :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. Code and Documentation Contributors From 57c0b2643e181d7841b6d2f557b8ef00eb5665cb Mon Sep 17 00:00:00 2001 From: Patrick Ferreira Date: Sat, 9 Oct 2021 01:34:13 -0300 Subject: [PATCH 15/21] refactor: Update sklearn/gaussian_process/_gpr.py Co-authored-by: Julien Jerphanion --- sklearn/gaussian_process/_gpr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 633cadc6f8e97..83107874152bf 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -416,7 +416,7 @@ def predict(self, X, return_std=False, return_cov=False): # if y_var has shape (n_samples, 1), reshape to (n_samples,) if y_var.shape[0] == y_var.size: - y_var = y_var.reshape(-1) + y_var = np.ravel(y_var) return y_mean, np.sqrt(y_var) else: From 6b3d12eb1261fb5367a8387afe09bcb48a9623b4 Mon Sep 17 00:00:00 2001 From: Patrick de Carvalho Tavares Rezende Ferreira Date: Tue, 12 Oct 2021 02:29:40 -0300 Subject: [PATCH 16/21] refactor: Undo Black changes. --- sklearn/gaussian_process/tests/test_gpr.py | 95 +++++++++++----------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 8a0e1e7683c96..4142340be8750 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -45,7 +45,7 @@ def f(x): + C(1e-5, (1e-5, 1e2)), C(0.1, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)), -] + ] non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel] @@ -179,7 +179,7 @@ def test_sample_statistics(kernel): np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1, - ) + ) def test_no_optimizer(): @@ -223,9 +223,9 @@ def test_random_starts(): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = ( - np.sin(X).sum(axis=1) - + np.sin(3 * X).sum(axis=1) - + rng.normal(scale=0.1, size=n_samples) + np.sin(X).sum(axis=1) + + np.sin(3 * X).sum(axis=1) + + rng.normal(scale=0.1, size=n_samples) ) kernel = C(1.0, (1e-2, 1e2)) * RBF( @@ -234,7 +234,9 @@ def test_random_starts(): last_lml = -np.inf for n_restarts_optimizer in range(5): gp = GaussianProcessRegressor( - kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0 + kernel=kernel, + n_restarts_optimizer=n_restarts_optimizer, + random_state=0, ).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) assert lml > last_lml - np.finfo(np.float32).eps @@ -383,9 +385,8 @@ def test_custom_optimizer(kernel): # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) - theta_opt, func_min = ( - initial_theta, - obj_func(initial_theta, eval_gradient=False), + theta_opt, func_min = initial_theta, obj_func( + initial_theta, eval_gradient=False ) for _ in range(50): theta = np.atleast_1d( @@ -410,10 +411,10 @@ def test_gpr_correct_error_message(): kernel = DotProduct() gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) message = ( - "The kernel, %s, is not returning a " - "positive definite matrix. Try gradually increasing " - "the 'alpha' parameter of your " - "GaussianProcessRegressor estimator." % kernel + "The kernel, %s, is not returning a " + "positive definite matrix. Try gradually increasing " + "the 'alpha' parameter of your " + "GaussianProcessRegressor estimator." % kernel ) with pytest.raises(np.linalg.LinAlgError, match=re.escape(message)): gpr.fit(X, y) @@ -482,23 +483,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k1__noise_level is close to the " - "specified upper bound 0.001. " - "Increasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k1__noise_level is close to the " + "specified upper bound 0.001. " + "Increasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k2__length_scale is close to the " - "specified lower bound 1000.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k2__length_scale is close to the " + "specified lower bound 1000.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) X_tile = np.tile(X, 2) @@ -513,23 +514,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 1 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 1 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) @@ -619,12 +620,12 @@ def test_gpr_consistency_std_cov_non_invertible_kernel(): ({"kernel": RBF(), "optimizer": "unknown"}, ValueError, "Unknown optimizer"), ({"alpha": np.zeros(100)}, ValueError, "alpha must be a scalar or an array"), ( - { - "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), - "n_restarts_optimizer": 2, - }, - ValueError, - "requires that all bounds are finite", + { + "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), + "n_restarts_optimizer": 2, + }, + ValueError, + "requires that all bounds are finite", ), ], ) From c40487d7df5554d3b643292615be0fda31a5a629 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 14 Oct 2021 11:35:55 +0200 Subject: [PATCH 17/21] fix whats new --- doc/whats_new/v1.0.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 47b69470cb94c..fb5a84da81703 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -43,6 +43,14 @@ Fixed models between sparse and dense input. :pr:`21195` by :user:`Jérémie du Boisberranger `. +:mod:`sklearn.gaussian_process` +............................... + +- |Fix| Compute `y_std` properly with multi-target in + :class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing + proper normalization in multi-target scene. + :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. + :mod:`sklearn.neighbors` ........................ @@ -677,7 +685,7 @@ Changelog Dupre la Tour`_. - |Fix| Decrease the numerical default tolerance in the lobpcg call - in :func:`manifold.spectral_embedding` to prevent numerical instability. + in :func:`manifold.spectral_embedding` to prevent numerical instability. :pr:`21194` by :user:`Andrew Knyazev `. :mod:`sklearn.metrics` @@ -979,13 +987,6 @@ Changelog :func:`~sklearn.utils.check_array` in 1.0 and will raise a `TypeError` in 1.2. :pr:`20165` by `Thomas Fan`_. -:mod:`sklearn.gaussian_process` -............................... - -- |Fix| Compute `y_std` properly with multi-target in - :class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing - proper normalization in multi-target scene. - :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. - |API| :func:`utils._testing.assert_warns` and :func:`utils._testing.assert_warns_message` are deprecated in 1.0 and will be removed in 1.2. Used `pytest.warns` context manager instead. Note that From 25c944b8ee19422b295849ffa2d4d0cd34a99b99 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 14 Oct 2021 11:46:23 +0200 Subject: [PATCH 18/21] black --- sklearn/gaussian_process/tests/test_gpr.py | 86 +++++++++++----------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 4142340be8750..8b3bc0dccd534 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -45,7 +45,7 @@ def f(x): + C(1e-5, (1e-5, 1e2)), C(0.1, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)), - ] +] non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel] @@ -179,7 +179,7 @@ def test_sample_statistics(kernel): np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1, - ) + ) def test_no_optimizer(): @@ -223,9 +223,9 @@ def test_random_starts(): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = ( - np.sin(X).sum(axis=1) - + np.sin(3 * X).sum(axis=1) - + rng.normal(scale=0.1, size=n_samples) + np.sin(X).sum(axis=1) + + np.sin(3 * X).sum(axis=1) + + rng.normal(scale=0.1, size=n_samples) ) kernel = C(1.0, (1e-2, 1e2)) * RBF( @@ -411,10 +411,10 @@ def test_gpr_correct_error_message(): kernel = DotProduct() gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) message = ( - "The kernel, %s, is not returning a " - "positive definite matrix. Try gradually increasing " - "the 'alpha' parameter of your " - "GaussianProcessRegressor estimator." % kernel + "The kernel, %s, is not returning a " + "positive definite matrix. Try gradually increasing " + "the 'alpha' parameter of your " + "GaussianProcessRegressor estimator." % kernel ) with pytest.raises(np.linalg.LinAlgError, match=re.escape(message)): gpr.fit(X, y) @@ -483,23 +483,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k1__noise_level is close to the " - "specified upper bound 0.001. " - "Increasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k1__noise_level is close to the " + "specified upper bound 0.001. " + "Increasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "k2__length_scale is close to the " - "specified lower bound 1000.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "k2__length_scale is close to the " + "specified lower bound 1000.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) X_tile = np.tile(X, 2) @@ -514,23 +514,23 @@ def test_warning_bounds(): assert len(record) == 2 assert ( - record[0].message.args[0] - == "The optimal value found for " - "dimension 0 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[0].message.args[0] + == "The optimal value found for " + "dimension 0 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) assert ( - record[1].message.args[0] - == "The optimal value found for " - "dimension 1 of parameter " - "length_scale is close to the " - "specified lower bound 10.0. " - "Decreasing the bound and calling " - "fit again may find a better value." + record[1].message.args[0] + == "The optimal value found for " + "dimension 1 of parameter " + "length_scale is close to the " + "specified lower bound 10.0. " + "Decreasing the bound and calling " + "fit again may find a better value." ) @@ -620,12 +620,12 @@ def test_gpr_consistency_std_cov_non_invertible_kernel(): ({"kernel": RBF(), "optimizer": "unknown"}, ValueError, "Unknown optimizer"), ({"alpha": np.zeros(100)}, ValueError, "alpha must be a scalar or an array"), ( - { - "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), - "n_restarts_optimizer": 2, - }, - ValueError, - "requires that all bounds are finite", + { + "kernel": WhiteKernel(noise_level_bounds=(-np.inf, np.inf)), + "n_restarts_optimizer": 2, + }, + ValueError, + "requires that all bounds are finite", ), ], ) From da31226d868149f733aee1083bea83ca5173fa58 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 19 Oct 2021 14:23:53 +0200 Subject: [PATCH 19/21] fix changelog --- doc/whats_new/v1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 6cd3a900a433a..d9686ac26af7a 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -50,6 +50,7 @@ Fixed models :class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing proper normalization in multi-target scene. :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. + :mod:`sklearn.linear_model` ........................... From d0a1a7495f0815f8f98606ba8c1d495871baf11d Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 20 Oct 2021 18:07:34 +0200 Subject: [PATCH 20/21] y_cov as well --- doc/whats_new/v1.0.rst | 1 + sklearn/gaussian_process/_gpr.py | 13 +++++++++---- sklearn/gaussian_process/tests/test_gpr.py | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index d27996d943ebc..c27565bc3968d 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -50,6 +50,7 @@ Fixed models :class:`sklearn.gaussian_process.GaussianProcessRegressor` allowing proper normalization in multi-target scene. :pr:`20761` by :user:`Patrick de C. T. R. Ferreira `. + :mod:`sklearn.feature_extraction` ................................. diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 90b313d7b1fcf..2d3498048c6db 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -403,7 +403,12 @@ def predict(self, X, return_std=False, return_cov=False): y_cov = self.kernel_(X) - V.T @ V # undo normalisation - y_cov = y_cov * self._y_train_std ** 2 + y_cov = np.outer(y_cov, self._y_train_std ** 2).reshape(*y_cov.shape, -1) + + # if y_cov has shape (n_samples, n_samples, 1), reshape to + # (n_samples, n_samples) + if y_cov.shape[2] == 1: + y_cov = np.squeeze(y_cov, axis=2) return y_mean, y_cov elif return_std: @@ -424,11 +429,11 @@ def predict(self, X, return_std=False, return_cov=False): y_var[y_var_negative] = 0.0 # undo normalisation - y_var = np.outer(y_var, self._y_train_std ** 2) + y_var = np.outer(y_var, self._y_train_std ** 2).reshape(*y_var.shape, -1) # if y_var has shape (n_samples, 1), reshape to (n_samples,) - if y_var.shape[0] == y_var.size: - y_var = np.ravel(y_var) + if y_var.shape[1] == 1: + y_var = np.squeeze(y_var, axis=1) return y_mean, np.sqrt(y_var) else: diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 8b3bc0dccd534..8e57865600987 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -655,7 +655,7 @@ def test_gpr_predict_error(): def test_y_std_with_multitarget_normalized(): - """Check the proper normalization of `y_std` in multi-target scene. + """Check the proper normalization of `y_std` and `y_cov` in multi-target scene. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/17394 @@ -677,6 +677,8 @@ def test_y_std_with_multitarget_normalized(): ) model.fit(X_train, y_train) y_pred, y_std = model.predict(X_test, return_std=True) + _, y_cov = model.predict(X_test, return_cov=True) assert y_pred.shape == (n_samples, n_targets) assert y_std.shape == (n_samples, n_targets) + assert y_cov.shape == (n_samples, n_samples, n_targets) From d52a00ef22422099e4622ac2a41da1ee4416c95b Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Wed, 20 Oct 2021 18:13:17 +0200 Subject: [PATCH 21/21] cln --- sklearn/gaussian_process/_gpr.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 2d3498048c6db..715cd2d0b16bd 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -353,7 +353,8 @@ def predict(self, X, return_std=False, return_cov=False): Standard deviation of predictive distribution at query points. Only returned when `return_std` is True. - y_cov : ndarray of shape (n_samples, n_samples), optional + y_cov : ndarray of shape (n_samples, n_samples) or \ + (n_samples, n_samples, n_targets), optional Covariance of joint predictive distribution a query points. Only returned when `return_cov` is True. """ @@ -403,7 +404,9 @@ def predict(self, X, return_std=False, return_cov=False): y_cov = self.kernel_(X) - V.T @ V # undo normalisation - y_cov = np.outer(y_cov, self._y_train_std ** 2).reshape(*y_cov.shape, -1) + y_cov = np.outer(y_cov, self._y_train_std ** 2).reshape( + *y_cov.shape, -1 + ) # if y_cov has shape (n_samples, n_samples, 1), reshape to # (n_samples, n_samples) @@ -429,7 +432,9 @@ def predict(self, X, return_std=False, return_cov=False): y_var[y_var_negative] = 0.0 # undo normalisation - y_var = np.outer(y_var, self._y_train_std ** 2).reshape(*y_var.shape, -1) + y_var = np.outer(y_var, self._y_train_std ** 2).reshape( + *y_var.shape, -1 + ) # if y_var has shape (n_samples, 1), reshape to (n_samples,) if y_var.shape[1] == 1: