From 6fe269b41a988ef1d3353ff2dccd79ef9b713547 Mon Sep 17 00:00:00 2001 From: ominusliticus Date: Wed, 8 Mar 2023 23:45:49 -0500 Subject: [PATCH] Gaussian Process: change default behavior of `sample_y` The `random_state` argument was set to `0` forceing `sample_y` to always return the same gaussian process samples. This is contrary to the expected behavior most people have from working with random number generating libraries such as numpy and scipy. Changing the default parameter to `None` will make the behavior of `sample_y` conform to the usual expectation a user. Updated `test_gpr.py` so that all calls to `sample_y` are passed `random_state=0` to enusre deterministic testing. --- doc/whats_new/v1.3.rst | 7 ++++++ sklearn/gaussian_process/_gpr.py | 16 +++++++++++++- sklearn/gaussian_process/tests/test_gpr.py | 25 ++++++++++++++++++---- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index 439b348ce2610..33182da74a35e 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -236,6 +236,13 @@ Changelog - |Enhancement| Added the parameter `fill_value` to :class:`impute.IterativeImputer`. :pr:`25232` by :user:`Thijs van Weezel `. +:mod:`sklearn.gaussian_process` +................................ + +- |API| The default value for `random_state` in + :meth:`GuassianProcessRegressor.sample_y` will change from `0` to `None` in v1.5. + :pr:`25789` by :user:`Kevin Ingles `. + :mod:`sklearn.inspection` ......................... diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index c0a8dc71b7352..420b1041d30f8 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -461,7 +461,7 @@ def predict(self, X, return_std=False, return_cov=False): else: return y_mean - def sample_y(self, X, n_samples=1, random_state=0): + def sample_y(self, X, n_samples=1, random_state="warn"): """Draw samples from Gaussian process and evaluate at X. Parameters @@ -476,6 +476,11 @@ def sample_y(self, X, n_samples=1, random_state=0): Determines random number generation to randomly draw samples. Pass an int for reproducible results across multiple function calls. + + .. versionchanged:: 1.5 + + The default value of `random_stae` will change to `None` in v1.5 + See :term:`Glossary `. Returns @@ -485,6 +490,15 @@ def sample_y(self, X, n_samples=1, random_state=0): Values of n_samples samples drawn from Gaussian process and evaluated at query points. """ + # TODO(1.5): Default updates to `None` + if random_state == "warn": + warnings.warn( + "The default value of random_state will change from" + + " 0 to None in 1.5", + FutureWarning, + ) + random_state = 0 + rng = check_random_state(random_state) y_mean, y_cov = self.predict(X, return_cov=True) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index c03778958a3ad..9a35219a41163 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -174,7 +174,7 @@ def test_sample_statistics(kernel): y_mean, y_cov = gpr.predict(X2, return_cov=True) - samples = gpr.sample_y(X2, 300000) + samples = gpr.sample_y(X2, 300000, random_state=0) # More digits accuracy would require many more samples assert_almost_equal(y_mean, np.mean(samples, 1), 1) @@ -368,8 +368,8 @@ def test_y_multioutput(): assert_almost_equal(y_std_1d, y_std_2d[..., target]) assert_almost_equal(y_cov_1d, y_cov_2d[..., target]) - y_sample_1d = gpr.sample_y(X2, n_samples=10) - y_sample_2d = gpr_2d.sample_y(X2, n_samples=10) + y_sample_1d = gpr.sample_y(X2, n_samples=10, random_state=0) + y_sample_2d = gpr_2d.sample_y(X2, n_samples=10, random_state=0) assert y_sample_1d.shape == (5, 10) assert y_sample_2d.shape == (5, 2, 10) @@ -769,10 +769,27 @@ def test_sample_y_shapes(normalize_y, n_targets): model.fit(X_train, y_train) - y_samples = model.sample_y(X_test, n_samples=n_samples_y_test) + y_samples = model.sample_y(X_test, n_samples=n_samples_y_test, random_state=0) assert y_samples.shape == y_test_shape +# TODO(1.5): Remove test when default changes +def test_sample_y_emit_warning(): + """Check that sample_y raises a warning if it uses the default parameter. + + This is a test in association with issue: + https://github.com/scikit-learn/scikit-learn/issues/25767 + """ + kernel = RBF(length_scale=1) + gpr = GaussianProcessRegressor(kernel=kernel) + + warning_message = ( + "The default value of random_state will change from 0 to None in 1.5" + ) + with pytest.warns(FutureWarning, match=warning_message): + _ = gpr.sample_y(X) + + class CustomKernel(C): """ A custom kernel that has a diag method that returns the first column of the