From 0bf6c06b67ea6cc5823a8f6fe776a6cb75e0207e Mon Sep 17 00:00:00 2001 From: Arturo Soberon Date: Thu, 27 Feb 2025 17:01:10 -0600 Subject: [PATCH 1/4] Replaced all hardcoded randon seeds with global_random_seed --- sklearn/linear_model/tests/test_huber.py | 55 +++++++++++++----------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 9c0c7d213ee27..5d862540c56c2 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -16,11 +16,12 @@ from sklearn.utils.fixes import CSR_CONTAINERS -def make_regression_with_outliers(n_samples=50, n_features=20): - rng = np.random.RandomState(0) +def make_regression_with_outliers(global_random_seed, n_samples=50, n_features=20): + rng = np.random.RandomState(global_random_seed) # Generate data with outliers by replacing 10% of the samples with noise. X, y = make_regression( - n_samples=n_samples, n_features=n_features, random_state=0, noise=0.05 + n_samples=n_samples, n_features=n_features, random_state=global_random_seed, + noise=0.05 ) # Replace 10% of the sample with noise. @@ -30,9 +31,9 @@ def make_regression_with_outliers(n_samples=50, n_features=20): return X, y -def test_huber_equals_lr_for_high_epsilon(): +def test_huber_equals_lr_for_high_epsilon(global_random_seed): # Test that Ridge matches LinearRegression for large epsilon - X, y = make_regression_with_outliers() + X, y = make_regression_with_outliers(global_random_seed) lr = LinearRegression() lr.fit(X, y) huber = HuberRegressor(epsilon=1e3, alpha=0.0) @@ -41,17 +42,17 @@ def test_huber_equals_lr_for_high_epsilon(): assert_almost_equal(huber.intercept_, lr.intercept_, 2) -def test_huber_max_iter(): - X, y = make_regression_with_outliers() +def test_huber_max_iter(global_random_seed): + X, y = make_regression_with_outliers(global_random_seed) huber = HuberRegressor(max_iter=1) huber.fit(X, y) assert huber.n_iter_ == huber.max_iter -def test_huber_gradient(): +def test_huber_gradient(global_random_seed): # Test that the gradient calculated by _huber_loss_and_gradient is correct - rng = np.random.RandomState(1) - X, y = make_regression_with_outliers() + rng = np.random.RandomState(global_random_seed) + X, y = make_regression_with_outliers(global_random_seed) sample_weight = rng.randint(1, 3, (y.shape[0])) def loss_func(x, *args): @@ -73,10 +74,10 @@ def grad_func(x, *args): @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) -def test_huber_sample_weights(csr_container): +def test_huber_sample_weights(csr_container, global_random_seed): # Test sample_weights implementation in HuberRegressor""" - X, y = make_regression_with_outliers() + X, y = make_regression_with_outliers(global_random_seed) huber = HuberRegressor() huber.fit(X, y) huber_coef = huber.coef_ @@ -92,7 +93,7 @@ def test_huber_sample_weights(csr_container): assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale) assert_array_almost_equal(huber.intercept_ / scale, huber_intercept / scale) - X, y = make_regression_with_outliers(n_samples=5, n_features=20) + X, y = make_regression_with_outliers(global_random_seed, n_samples=5, n_features=20) X_new = np.vstack((X, np.vstack((X[1], X[1], X[3])))) y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]])) huber.fit(X_new, y_new) @@ -114,8 +115,8 @@ def test_huber_sample_weights(csr_container): @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) -def test_huber_sparse(csr_container): - X, y = make_regression_with_outliers() +def test_huber_sparse(csr_container, global_random_seed): + X, y = make_regression_with_outliers(global_random_seed) huber = HuberRegressor(alpha=0.1) huber.fit(X, y) @@ -126,9 +127,9 @@ def test_huber_sparse(csr_container): assert_array_equal(huber.outliers_, huber_sparse.outliers_) -def test_huber_scaling_invariant(): +def test_huber_scaling_invariant(global_random_seed): # Test that outliers filtering is scaling independent. - X, y = make_regression_with_outliers() + X, y = make_regression_with_outliers(global_random_seed) huber = HuberRegressor(fit_intercept=False, alpha=0.0) huber.fit(X, y) n_outliers_mask_1 = huber.outliers_ @@ -143,10 +144,10 @@ def test_huber_scaling_invariant(): assert_array_equal(n_outliers_mask_3, n_outliers_mask_1) -def test_huber_and_sgd_same_results(): +def test_huber_and_sgd_same_results(global_random_seed): # Test they should converge to same coefficients for same parameters - X, y = make_regression_with_outliers(n_samples=10, n_features=2) + X, y = make_regression_with_outliers(global_random_seed, n_samples=10, n_features=2) # Fit once to find out the scale parameter. Scale down X and y by scale # so that the scale parameter is optimized to 1.0 @@ -161,7 +162,7 @@ def test_huber_and_sgd_same_results(): alpha=0.0, loss="huber", shuffle=True, - random_state=0, + random_state=global_random_seed, max_iter=10000, fit_intercept=False, epsilon=1.35, @@ -171,8 +172,8 @@ def test_huber_and_sgd_same_results(): assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1) -def test_huber_warm_start(): - X, y = make_regression_with_outliers() +def test_huber_warm_start(global_random_seed): + X, y = make_regression_with_outliers(global_random_seed) huber_warm = HuberRegressor(alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1) huber_warm.fit(X, y) @@ -186,9 +187,9 @@ def test_huber_warm_start(): assert huber_warm.n_iter_ == 0 -def test_huber_better_r2_score(): +def test_huber_better_r2_score(global_random_seed): # Test that huber returns a better r2 score than non-outliers""" - X, y = make_regression_with_outliers() + X, y = make_regression_with_outliers(global_random_seed) huber = HuberRegressor(alpha=0.01) huber.fit(X, y) linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y @@ -209,8 +210,10 @@ def test_huber_better_r2_score(): assert ridge_outlier_score > huber_outlier_score -def test_huber_bool(): +def test_huber_bool(global_random_seed): # Test that it does not crash with bool data - X, y = make_regression(n_samples=200, n_features=2, noise=4.0, random_state=0) + X, y = make_regression( + n_samples=200, n_features=2, noise=4.0, random_state=global_random_seed + ) X_bool = X > 0 HuberRegressor().fit(X_bool, y) From cd151cfd0f40d23a588675a8ee5a38dce020571a Mon Sep 17 00:00:00 2001 From: Arturo Soberon Date: Thu, 27 Feb 2025 17:07:20 -0600 Subject: [PATCH 2/4] Black compliant file --- sklearn/linear_model/tests/test_huber.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 5d862540c56c2..bc132e88527f3 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -20,8 +20,10 @@ def make_regression_with_outliers(global_random_seed, n_samples=50, n_features=2 rng = np.random.RandomState(global_random_seed) # Generate data with outliers by replacing 10% of the samples with noise. X, y = make_regression( - n_samples=n_samples, n_features=n_features, random_state=global_random_seed, - noise=0.05 + n_samples=n_samples, + n_features=n_features, + random_state=global_random_seed, + noise=0.05, ) # Replace 10% of the sample with noise. From b404b02d8ca2306c083a53515e26da199a8f8f64 Mon Sep 17 00:00:00 2001 From: Arturo Soberon Date: Thu, 27 Feb 2025 17:50:41 -0600 Subject: [PATCH 3/4] empty commit to trigger all CI jobs [all random seeds] test_parallel From f8e8f113832338f3d0f132fa7d7e53ea47951582 Mon Sep 17 00:00:00 2001 From: Arturo Soberon Date: Mon, 3 Mar 2025 09:28:31 -0600 Subject: [PATCH 4/4] Empty commit to trigger CI jobs - test_huber [all random seeds] - make_regression_with_outliers - test_huber_equals_lr_for_high_epsilon - test_huber_max_iter - test_huber_gradient - test_huber_sample_weights - test_huber_sparse - test_huber_scaling_invariant - test_huber_and_sgd_same_results - test_huber_warm_start - test_huber_better_r2_score - test_huber_bool