From 61b9e1fd6680dffea56fa824e380577e5868498b Mon Sep 17 00:00:00 2001 From: mohammed benyamna Date: Fri, 25 Apr 2025 19:33:46 +0100 Subject: [PATCH 1/5] Replace filtered data fixture with synthetic binary dataset Replaced the `data_binary` fixture that filtered classes from a multiclass dataset with a new fixture generating a synthetic binary classification dataset using `make_classification`. This ensures consistent data characteristics, introduces label noise, and better simulates real-world classification challenges. --- .../metrics/_plot/tests/test_roc_curve_display.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index c2e6c865fa9a9..fb29f785428a8 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -13,6 +13,7 @@ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle +from sklearn.datasets import make_classification @pytest.fixture(scope="module") @@ -26,8 +27,16 @@ def data(): @pytest.fixture(scope="module") def data_binary(data): - X, y = data - return X[y < 2], y[y < 2] + X, y = make_classification( + n_samples=200, + n_features=20, + n_informative=5, + n_redundant=2, + flip_y=0.1, # Add some label noise + class_sep=0.8, # Reduce separation for more overlap + random_state=42, + ) + return X, y @pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"]) From e299bf6636cb0e53bc59e6878bccc0f85495f5e0 Mon Sep 17 00:00:00 2001 From: mohammed benyamna Date: Fri, 25 Apr 2025 19:42:02 +0100 Subject: [PATCH 2/5] Update test_roc_curve_display.py --- sklearn/metrics/_plot/tests/test_roc_curve_display.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index fb29f785428a8..eea887b34d386 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -5,15 +5,14 @@ from sklearn import clone from sklearn.compose import make_column_transformer -from sklearn.datasets import load_breast_cancer, load_iris +from sklearn.datasets import load_breast_cancer, load_iris, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression from sklearn.metrics import RocCurveDisplay, auc, roc_curve from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.utils import shuffle -from sklearn.datasets import make_classification +from sklearn.utils import shuffle @pytest.fixture(scope="module") From 7ab94303b86b19c2bfdda737ca58ac01580aa12f Mon Sep 17 00:00:00 2001 From: mohammed benyamna Date: Fri, 25 Apr 2025 19:48:32 +0100 Subject: [PATCH 3/5] Update test_roc_curve_display.py --- sklearn/metrics/_plot/tests/test_roc_curve_display.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index eea887b34d386..f523f8f0c6d6a 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -1,5 +1,5 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from scipy.integrate import trapezoid @@ -12,7 +12,7 @@ from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.utils import shuffle +from sklearn.utils import shuffle @pytest.fixture(scope="module") From 4cfe688c3a62d7c6e69e543266ad19f36fd6747d Mon Sep 17 00:00:00 2001 From: mohammed benyamna Date: Fri, 25 Apr 2025 20:07:48 +0100 Subject: [PATCH 4/5] Replace filtered data fixture with synthetic binary dataset --- sklearn/metrics/_plot/tests/test_roc_curve_display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index f523f8f0c6d6a..68b8b748035fb 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -1,5 +1,5 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from scipy.integrate import trapezoid From e8b1e45c2a1f3f71afac505ca42f5793cd60b680 Mon Sep 17 00:00:00 2001 From: mohammed benyamna Date: Mon, 28 Apr 2025 10:38:47 +0100 Subject: [PATCH 5/5] update the data_binary and delete the data() --- .../_plot/tests/test_roc_curve_display.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index 68b8b748035fb..324a35ba544f8 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -1,11 +1,11 @@ import numpy as np import pytest from numpy.testing import assert_allclose -from scipy.integrate import trapezoid +from scipy.integrate import trapz as trapezoid from sklearn import clone from sklearn.compose import make_column_transformer -from sklearn.datasets import load_breast_cancer, load_iris, make_classification +from sklearn.datasets import load_breast_cancer, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression from sklearn.metrics import RocCurveDisplay, auc, roc_curve @@ -16,23 +16,14 @@ @pytest.fixture(scope="module") -def data(): - X, y = load_iris(return_X_y=True) - # Avoid introducing test dependencies by mistake. - X.flags.writeable = False - y.flags.writeable = False - return X, y - - -@pytest.fixture(scope="module") -def data_binary(data): +def data_binary(): X, y = make_classification( n_samples=200, n_features=20, n_informative=5, n_redundant=2, - flip_y=0.1, # Add some label noise - class_sep=0.8, # Reduce separation for more overlap + flip_y=0.1, + class_sep=0.8, random_state=42, ) return X, y