Skip to content

TST add global_random_seed fixture to sklearn/covariance/tests/test_robust_covariance.py #25821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions sklearn/covariance/tests/test_robust_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,24 @@
n_samples, n_features = X.shape


def test_mcd():
def test_mcd(global_random_seed):
# Tests the FastMCD algorithm implementation
# Small data set
# test without outliers (random independent normal data)
launch_mcd_on_dataset(100, 5, 0, 0.01, 0.1, 80)
launch_mcd_on_dataset(100, 5, 0, 0.02, 0.1, 75, global_random_seed)
# test with a contaminated data set (medium contamination)
launch_mcd_on_dataset(100, 5, 20, 0.01, 0.01, 70)
launch_mcd_on_dataset(100, 5, 20, 0.3, 0.3, 65, global_random_seed)
# test with a contaminated data set (strong contamination)
launch_mcd_on_dataset(100, 5, 40, 0.1, 0.1, 50)
launch_mcd_on_dataset(100, 5, 40, 0.1, 0.1, 50, global_random_seed)

# Medium data set
launch_mcd_on_dataset(1000, 5, 450, 0.1, 0.1, 540)
launch_mcd_on_dataset(1000, 5, 450, 0.1, 0.1, 540, global_random_seed)

# Large data set
launch_mcd_on_dataset(1700, 5, 800, 0.1, 0.1, 870)
launch_mcd_on_dataset(1700, 5, 800, 0.1, 0.1, 870, global_random_seed)

# 1D data set
launch_mcd_on_dataset(500, 1, 100, 0.001, 0.001, 350)
launch_mcd_on_dataset(500, 1, 100, 0.02, 0.02, 350, global_random_seed)


def test_fast_mcd_on_invalid_input():
Expand All @@ -56,10 +56,10 @@ def test_mcd_class_on_invalid_input():


def launch_mcd_on_dataset(
n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support
n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support, seed
):

rand_gen = np.random.RandomState(0)
rand_gen = np.random.RandomState(seed)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me try to use the global seed here as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Looks successful.

data = rand_gen.randn(n_samples, n_features)
# add some outliers
outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
Expand All @@ -70,7 +70,7 @@ def launch_mcd_on_dataset(

pure_data = data[inliers_mask]
# compute MCD by fitting an object
mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
mcd_fit = MinCovDet(random_state=seed).fit(data)
T = mcd_fit.location_
S = mcd_fit.covariance_
H = mcd_fit.support_
Expand All @@ -92,10 +92,10 @@ def test_mcd_issue1127():
mcd.fit(X)


def test_mcd_issue3367():
def test_mcd_issue3367(global_random_seed):
# Check that MCD completes when the covariance matrix is singular
# i.e. one of the rows and columns are all zeros
rand_gen = np.random.RandomState(0)
rand_gen = np.random.RandomState(global_random_seed)

# Think of these as the values for X and Y -> 10 values between -5 and 5
data_values = np.linspace(-5, 5, 10).tolist()
Expand Down Expand Up @@ -140,7 +140,7 @@ def test_mcd_support_covariance_is_zero():
MinCovDet().fit(X)


def test_mcd_increasing_det_warning():
def test_mcd_increasing_det_warning(global_random_seed):
# Check that a warning is raised if we observe increasing determinants
# during the c_step. In theory the sequence of determinants should be
# decreasing. Increasing determinants are likely due to ill-conditioned
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_mcd_increasing_det_warning():
[5.2, 3.5, 1.5, 0.2],
]

mcd = MinCovDet(random_state=1)
mcd = MinCovDet(support_fraction=0.5, random_state=global_random_seed)
warn_msg = "Determinant has increased"
with pytest.warns(RuntimeWarning, match=warn_msg):
mcd.fit(X)