Skip to content

TST use global_random_seed in sklearn/metrics/tests/test_regression.py #30865

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 11, 2025
Merged
40 changes: 23 additions & 17 deletions sklearn/metrics/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,42 +494,44 @@ def test_regression_single_sample(metric):
assert np.isnan(score)


def test_tweedie_deviance_continuity():
def test_tweedie_deviance_continuity(global_random_seed):
n_samples = 100

y_true = np.random.RandomState(0).rand(n_samples) + 0.1
y_pred = np.random.RandomState(1).rand(n_samples) + 0.1
rng = np.random.RandomState(global_random_seed)

y_true = rng.rand(n_samples) + 0.1
y_pred = rng.rand(n_samples) + 0.1

assert_allclose(
mean_tweedie_deviance(y_true, y_pred, power=0 - 1e-10),
mean_tweedie_deviance(y_true, y_pred, power=0),
)

# Ws we get closer to the limit, with 1e-12 difference the absolute
# Ws we get closer to the limit, with 1e-12 difference the
# tolerance to pass the below check increases. There are likely
# numerical precision issues on the edges of different definition
# regions.
assert_allclose(
mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),
mean_tweedie_deviance(y_true, y_pred, power=1),
atol=1e-6,
rtol=1e-5,
)

assert_allclose(
mean_tweedie_deviance(y_true, y_pred, power=2 - 1e-10),
mean_tweedie_deviance(y_true, y_pred, power=2),
atol=1e-6,
rtol=1e-5,
)

assert_allclose(
mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10),
mean_tweedie_deviance(y_true, y_pred, power=2),
atol=1e-6,
rtol=1e-5,
)


def test_mean_absolute_percentage_error():
random_number_generator = np.random.RandomState(42)
def test_mean_absolute_percentage_error(global_random_seed):
random_number_generator = np.random.RandomState(global_random_seed)
y_true = random_number_generator.exponential(size=100)
y_pred = 1.2 * y_true
assert mean_absolute_percentage_error(y_true, y_pred) == pytest.approx(0.2)
Expand All @@ -539,7 +541,9 @@ def test_mean_absolute_percentage_error():
"distribution", ["normal", "lognormal", "exponential", "uniform"]
)
@pytest.mark.parametrize("target_quantile", [0.05, 0.5, 0.75])
def test_mean_pinball_loss_on_constant_predictions(distribution, target_quantile):
def test_mean_pinball_loss_on_constant_predictions(
distribution, target_quantile, global_random_seed
):
if not hasattr(np, "quantile"):
pytest.skip(
"This test requires a more recent version of numpy "
Expand All @@ -548,7 +552,7 @@ def test_mean_pinball_loss_on_constant_predictions(distribution, target_quantile

# Check that the pinball loss is minimized by the empirical quantile.
n_samples = 3000
rng = np.random.RandomState(42)
rng = np.random.RandomState(global_random_seed)
data = getattr(rng, distribution)(size=n_samples)

# Compute the best possible pinball loss for any constant predictor:
Expand Down Expand Up @@ -582,20 +586,22 @@ def objective_func(x):
constant_pred = np.full(n_samples, fill_value=x)
return mean_pinball_loss(data, constant_pred, alpha=target_quantile)

result = optimize.minimize(objective_func, data.mean(), method="Nelder-Mead")
result = optimize.minimize(objective_func, data.mean())
assert result.success
# The minimum is not unique with limited data, hence the large tolerance.
assert result.x == pytest.approx(best_pred, rel=1e-2)
# For the normal distribution and the 0.5 quantile, the expected result is close to
# 0, hence the additional use of absolute tolerance.
assert_allclose(result.x, best_pred, rtol=1e-1, atol=1e-3)
assert result.fun == pytest.approx(best_pbl)


def test_dummy_quantile_parameter_tuning():
def test_dummy_quantile_parameter_tuning(global_random_seed):
# Integration test to check that it is possible to use the pinball loss to
# tune the hyperparameter of a quantile regressor. This is conceptually
# similar to the previous test but using the scikit-learn estimator and
# scoring API instead.
n_samples = 1000
rng = np.random.RandomState(0)
rng = np.random.RandomState(global_random_seed)
X = rng.normal(size=(n_samples, 5)) # Ignored
y = rng.exponential(size=n_samples)

Expand All @@ -616,9 +622,9 @@ def test_dummy_quantile_parameter_tuning():
assert grid_search.best_params_["quantile"] == pytest.approx(alpha)


def test_pinball_loss_relation_with_mae():
def test_pinball_loss_relation_with_mae(global_random_seed):
# Test that mean_pinball loss with alpha=0.5 if half of mean absolute error
rng = np.random.RandomState(714)
rng = np.random.RandomState(global_random_seed)
n = 100
y_true = rng.normal(size=n)
y_pred = y_true.copy() + rng.uniform(n)
Expand Down