Skip to content

TST Use global_random_seed in sklearn/datasets/tests/test_samples_generator.py #31181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 11, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 34 additions & 27 deletions sklearn/datasets/tests/test_samples_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,20 +344,20 @@ def test_make_hastie_10_2():
assert np.unique(y).shape == (2,), "Unexpected number of classes"


def test_make_regression():
def test_make_regression(global_random_seed):
X, y, c = make_regression(
n_samples=100,
n_samples=200,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's okay to increase the size of the dataset here since we're testing the variance of the generated data, and so we need sufficient data points for it to be stable enough.

n_features=10,
n_informative=3,
effective_rank=5,
coef=True,
bias=0.0,
noise=1.0,
random_state=0,
random_state=global_random_seed,
)

assert X.shape == (100, 10), "X shape mismatch"
assert y.shape == (100,), "y shape mismatch"
assert X.shape == (200, 10), "X shape mismatch"
assert y.shape == (200,), "y shape mismatch"
assert c.shape == (10,), "coef shape mismatch"
assert sum(c != 0.0) == 3, "Unexpected number of informative features"

Expand All @@ -369,15 +369,15 @@ def test_make_regression():
assert X.shape == (100, 1)


def test_make_regression_multitarget():
def test_make_regression_multitarget(global_random_seed):
X, y, c = make_regression(
n_samples=100,
n_features=10,
n_informative=3,
n_targets=3,
coef=True,
noise=1.0,
random_state=0,
random_state=global_random_seed,
)

assert X.shape == (100, 10), "X shape mismatch"
Expand All @@ -389,11 +389,11 @@ def test_make_regression_multitarget():
assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)


def test_make_blobs():
def test_make_blobs(global_random_seed):
cluster_stds = np.array([0.05, 0.2, 0.4])
cluster_centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
X, y = make_blobs(
random_state=0,
random_state=global_random_seed,
n_samples=50,
n_features=2,
centers=cluster_centers,
Expand All @@ -417,12 +417,15 @@ def test_make_blobs_n_samples_list():
), "Incorrect number of samples per blob"


def test_make_blobs_n_samples_list_with_centers():
def test_make_blobs_n_samples_list_with_centers(global_random_seed):
n_samples = [20, 20, 20]
centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]])
cluster_stds = np.array([0.05, 0.2, 0.4])
X, y = make_blobs(
n_samples=n_samples, centers=centers, cluster_std=cluster_stds, random_state=0
n_samples=n_samples,
centers=centers,
cluster_std=cluster_stds,
random_state=global_random_seed,
)

assert X.shape == (sum(n_samples), 2), "X shape mismatch"
Expand Down Expand Up @@ -479,8 +482,10 @@ def test_make_blobs_error():
make_blobs(n_samples, centers=3)


def test_make_friedman1():
X, y = make_friedman1(n_samples=5, n_features=10, noise=0.0, random_state=0)
def test_make_friedman1(global_random_seed):
X, y = make_friedman1(
n_samples=5, n_features=10, noise=0.0, random_state=global_random_seed
)

assert X.shape == (5, 10), "X shape mismatch"
assert y.shape == (5,), "y shape mismatch"
Expand All @@ -494,8 +499,8 @@ def test_make_friedman1():
)


def test_make_friedman2():
X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0)
def test_make_friedman2(global_random_seed):
X, y = make_friedman2(n_samples=5, noise=0.0, random_state=global_random_seed)

assert X.shape == (5, 4), "X shape mismatch"
assert y.shape == (5,), "y shape mismatch"
Expand All @@ -505,8 +510,8 @@ def test_make_friedman2():
)


def test_make_friedman3():
X, y = make_friedman3(n_samples=5, noise=0.0, random_state=0)
def test_make_friedman3(global_random_seed):
X, y = make_friedman3(n_samples=5, noise=0.0, random_state=global_random_seed)

assert X.shape == (5, 4), "X shape mismatch"
assert y.shape == (5,), "y shape mismatch"
Expand All @@ -533,13 +538,13 @@ def test_make_low_rank_matrix():
assert sum(s) - 5 < 0.1, "X rank is not approximately 5"


def test_make_sparse_coded_signal():
def test_make_sparse_coded_signal(global_random_seed):
Y, D, X = make_sparse_coded_signal(
n_samples=5,
n_components=8,
n_features=10,
n_nonzero_coefs=3,
random_state=0,
random_state=global_random_seed,
)
assert Y.shape == (5, 10), "Y shape mismatch"
assert D.shape == (8, 10), "D shape mismatch"
Expand All @@ -557,8 +562,8 @@ def test_make_sparse_uncorrelated():
assert y.shape == (5,), "y shape mismatch"


def test_make_spd_matrix():
X = make_spd_matrix(n_dim=5, random_state=0)
def test_make_spd_matrix(global_random_seed):
X = make_spd_matrix(n_dim=5, random_state=global_random_seed)

assert X.shape == (5, 5), "X shape mismatch"
assert_array_almost_equal(X, X.T)
Expand Down Expand Up @@ -604,17 +609,19 @@ def test_make_sparse_spd_matrix(norm_diag, sparse_format, global_random_seed):


@pytest.mark.parametrize("hole", [False, True])
def test_make_swiss_roll(hole):
X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0, hole=hole)
def test_make_swiss_roll(global_random_seed, hole):
X, t = make_swiss_roll(
n_samples=5, noise=0.0, random_state=global_random_seed, hole=hole
)

assert X.shape == (5, 3)
assert t.shape == (5,)
assert_array_almost_equal(X[:, 0], t * np.cos(t))
assert_array_almost_equal(X[:, 2], t * np.sin(t))


def test_make_s_curve():
X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0)
def test_make_s_curve(global_random_seed):
X, t = make_s_curve(n_samples=5, noise=0.0, random_state=global_random_seed)

assert X.shape == (5, 3), "X shape mismatch"
assert t.shape == (5,), "t shape mismatch"
Expand Down Expand Up @@ -669,8 +676,8 @@ def test_make_checkerboard():
assert_array_almost_equal(X1, X2)


def test_make_moons():
X, y = make_moons(3, shuffle=False)
def test_make_moons(global_random_seed):
X, y = make_moons(3, shuffle=False, random_state=global_random_seed)
for x, label in zip(X, y):
center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
dist_sqr = ((x - center) ** 2).sum()
Expand Down