From dba519d10d1e4cc41f1b8aaafddd22a45a721f46 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:17:30 +0530 Subject: [PATCH 01/13] add global_random_seed to test_matthews_corrcoef_against_numpy_corrcoef --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 21e2eed9b53cc..65ab47a81f0ba 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -970,8 +970,8 @@ def test_zero_division_nan_warning(metric, y_true, y_pred): assert result == 0.0 -def test_matthews_corrcoef_against_numpy_corrcoef(): - rng = np.random.RandomState(0) +def test_matthews_corrcoef_against_numpy_corrcoef(global_random_seed): + rng = np.random.RandomState(global_random_seed) y_true = rng.randint(0, 2, size=20) y_pred = rng.randint(0, 2, size=20) From f6137f7ea1501f2cab6ad2a9a45edd344bc8673e Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:22:56 +0530 Subject: [PATCH 02/13] add global_random_seed to test_matthews_corrcoef_against_jurman --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 65ab47a81f0ba..3b6d75687badd 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -980,11 +980,11 @@ def test_matthews_corrcoef_against_numpy_corrcoef(global_random_seed): ) -def test_matthews_corrcoef_against_jurman(): +def test_matthews_corrcoef_against_jurman(global_random_seed): # Check that the multiclass matthews_corrcoef agrees with the definition # presented in Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC # and CEN Error Measures in MultiClass Prediction - rng = np.random.RandomState(0) + rng = np.random.RandomState(global_random_seed) y_true = rng.randint(0, 2, size=20) y_pred = rng.randint(0, 2, size=20) sample_weight = rng.rand(20) From 81740a3d430c42ff303e84fc3b16bd3a5a2d6580 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:24:27 +0530 Subject: [PATCH 03/13] add global_random_seed to test_matthews_corrcoef --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 3b6d75687badd..19748b5f6c749 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1019,8 +1019,8 @@ def test_matthews_corrcoef_against_jurman(global_random_seed): assert_almost_equal(mcc_ours, mcc_jurman, 10) -def test_matthews_corrcoef(): - rng = np.random.RandomState(0) +def test_matthews_corrcoef(global_random_seed): + rng = np.random.RandomState(global_random_seed) y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)] # corrcoef of same vectors must be 1 From d8b25844303240dedc073df08ce86ea6d0bb4c00 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:33:31 +0530 Subject: [PATCH 04/13] add global_random_seed to test_matthews_corrcoef_multiclass --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 19748b5f6c749..0e557a00f7c28 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1054,8 +1054,8 @@ def test_matthews_corrcoef(global_random_seed): assert_almost_equal(matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.0) -def test_matthews_corrcoef_multiclass(): - rng = np.random.RandomState(0) +def test_matthews_corrcoef_multiclass(global_random_seed): + rng = np.random.RandomState(global_random_seed) ord_a = ord("a") n_classes = 4 y_true = [chr(ord_a + i) for i in rng.randint(0, n_classes, size=20)] From 2e14c8f134d2a3abcbe5b50d40f9819186aa0240 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:34:54 +0530 Subject: [PATCH 05/13] add global_random_seed to test_matthews_corrcoef_overflow --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 0e557a00f7c28..b67c91737960c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1111,9 +1111,9 @@ def test_matthews_corrcoef_multiclass(global_random_seed): @pytest.mark.parametrize("n_points", [100, 10000]) -def test_matthews_corrcoef_overflow(n_points): +def test_matthews_corrcoef_overflow(n_points, global_random_seed): # https://github.com/scikit-learn/scikit-learn/issues/9622 - rng = np.random.RandomState(20170906) + rng = np.random.RandomState(global_random_seed) def mcc_safe(y_true, y_pred): conf_matrix = confusion_matrix(y_true, y_pred) From 459de8713d29e4c97b9f56c9018c1d44f95c1ec2 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:36:36 +0530 Subject: [PATCH 06/13] add global_random_seed to test_classification_metric_pos_label_types --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index b67c91737960c..1bd6e4e5c94dc 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2946,13 +2946,13 @@ def test_balanced_accuracy_score(y_true, y_pred): @pytest.mark.parametrize( "classes", [(False, True), (0, 1), (0.0, 1.0), ("zero", "one")] ) -def test_classification_metric_pos_label_types(metric, classes): +def test_classification_metric_pos_label_types(metric, classes, global_random_seed): """Check that the metric works with different types of `pos_label`. We can expect `pos_label` to be a bool, an integer, a float, a string. No error should be raised for those types. """ - rng = np.random.RandomState(42) + rng = np.random.RandomState(global_random_seed) n_samples, pos_label = 10, classes[-1] y_true = rng.choice(classes, size=n_samples, replace=True) if metric is brier_score_loss: From af92257bceb7d5a7f383844e4448c0160cc7c763 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:38:53 +0530 Subject: [PATCH 07/13] add global_random_seed to make_prediction --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 1bd6e4e5c94dc..b4ed79cc24456 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -55,7 +55,7 @@ # Utilities for testing -def make_prediction(dataset=None, binary=False): +def make_prediction(global_random_seed, dataset=None, binary=False): """Make some classification predictions on a toy dataset using a SVC If binary is True restrict to a binary classification problem instead of a @@ -82,7 +82,7 @@ def make_prediction(dataset=None, binary=False): half = int(n_samples / 2) # add noisy features to make the problem harder and avoid perfect results - rng = np.random.RandomState(0) + rng = np.random.RandomState(global_random_seed) X = np.c_[X, rng.randn(n_samples, 200 * n_features)] # run classifier, get class probabilities and label predictions From 734ab6c685051e7bf301f69887a332fac92c2713 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:41:29 +0530 Subject: [PATCH 08/13] Undo add global_random_seed to make_prediction --- sklearn/metrics/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index b4ed79cc24456..1bd6e4e5c94dc 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -55,7 +55,7 @@ # Utilities for testing -def make_prediction(global_random_seed, dataset=None, binary=False): +def make_prediction(dataset=None, binary=False): """Make some classification predictions on a toy dataset using a SVC If binary is True restrict to a binary classification problem instead of a @@ -82,7 +82,7 @@ def make_prediction(global_random_seed, dataset=None, binary=False): half = int(n_samples / 2) # add noisy features to make the problem harder and avoid perfect results - rng = np.random.RandomState(global_random_seed) + rng = np.random.RandomState(0) X = np.c_[X, rng.randn(n_samples, 200 * n_features)] # run classifier, get class probabilities and label predictions From 282b3af6c4a3cd7ac23be240e896a1eaa3a78a73 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Tue, 18 Feb 2025 21:56:07 +0530 Subject: [PATCH 09/13] test_classification [all random seeds] test_matthews_corrcoef_against_numpy_corrcoef test_matthews_corrcoef_against_jurman test_matthews_corrcoef test_matthews_corrcoef_multiclass test_classification_metric_pos_label_types From 05f25e81316531819b462ef0549452aa166e197f Mon Sep 17 00:00:00 2001 From: Code_Blooded <90474550+Rishab260@users.noreply.github.com> Date: Sat, 22 Feb 2025 17:07:48 +0530 Subject: [PATCH 10/13] Update sklearn/metrics/tests/test_classification.py Co-authored-by: Xiao Yuan --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 1bd6e4e5c94dc..91a974a468316 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2952,7 +2952,7 @@ def test_classification_metric_pos_label_types(metric, classes, global_random_se We can expect `pos_label` to be a bool, an integer, a float, a string. No error should be raised for those types. """ - rng = np.random.RandomState(global_random_seed) + rng = np.random.RandomState(42) n_samples, pos_label = 10, classes[-1] y_true = rng.choice(classes, size=n_samples, replace=True) if metric is brier_score_loss: From f34240910fe284ac5bb45a8c75d1d8d842146a06 Mon Sep 17 00:00:00 2001 From: Code_Blooded <90474550+Rishab260@users.noreply.github.com> Date: Sat, 22 Feb 2025 17:08:34 +0530 Subject: [PATCH 11/13] Update sklearn/metrics/tests/test_classification.py Co-authored-by: Xiao Yuan --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 91a974a468316..b67c91737960c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -2946,7 +2946,7 @@ def test_balanced_accuracy_score(y_true, y_pred): @pytest.mark.parametrize( "classes", [(False, True), (0, 1), (0.0, 1.0), ("zero", "one")] ) -def test_classification_metric_pos_label_types(metric, classes, global_random_seed): +def test_classification_metric_pos_label_types(metric, classes): """Check that the metric works with different types of `pos_label`. We can expect `pos_label` to be a bool, an integer, a float, a string. From 025b9c7eeaaa411eba337f0cda291908e116e5da Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Sun, 2 Mar 2025 13:24:37 +0530 Subject: [PATCH 12/13] test_classification [all random seeds] test_matthews_corrcoef_against_numpy_corrcoef test_matthews_corrcoef_against_jurman test_matthews_corrcoef test_matthews_corrcoef_multiclass test_matthews_corrcoef_overflow From 371d82de458a4393953cad26d88c0e87176989c2 Mon Sep 17 00:00:00 2001 From: "rishab260@hotmail.com" Date: Sun, 2 Mar 2025 13:34:46 +0530 Subject: [PATCH 13/13] test_classification [all random seeds] test_matthews_corrcoef_against_numpy_corrcoef test_matthews_corrcoef_against_jurman test_matthews_corrcoef test_matthews_corrcoef_multiclass test_matthews_corrcoef_overflow