From d0b90a2605f6b3770c52904beee25fdbd71cf9ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 30 Oct 2024 14:50:12 +0100 Subject: [PATCH 1/2] FIX Fix thread-safety issue in RFECV when used with joblib threading backend --- sklearn/feature_selection/_rfe.py | 2 +- sklearn/feature_selection/tests/test_rfe.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 0282facf9fd31..3015a4ae55e94 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -882,7 +882,7 @@ def fit(self, X, y, *, groups=None, **params): func = delayed(_rfe_single_fit) scores_features = parallel( - func(rfe, self.estimator, X, y, train, test, scorer, routed_params) + func(clone(rfe), self.estimator, X, y, train, test, scorer, routed_params) for train, test in cv.split(X, y, **routed_params.splitter.split) ) scores, step_n_features = zip(*scores_features) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 98b55366c5853..fa7650b49202c 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from joblib import parallel_config from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal from sklearn.base import BaseEstimator, ClassifierMixin @@ -703,3 +704,21 @@ def test_rfe_with_sample_weight(): rfe_sw_2.fit(X, y, sample_weight=sample_weight_2) assert not np.array_equal(rfe_sw_2.ranking_, rfe.ranking_) + + +def test_rfe_with_joblib_threading_backend(global_random_seed): + X, y = make_classification(random_state=global_random_seed) + + clf = LogisticRegression() + rfe = RFECV( + estimator=clf, + n_jobs=2, + ) + + rfe.fit(X, y) + ranking_ref = rfe.ranking_ + + with parallel_config(backend="threading"): + rfe.fit(X, y) + + assert_array_equal(ranking_ref, rfe.ranking_) From ac9e685392c64a3a525994ff6356dcca21e2241e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Wed, 30 Oct 2024 16:10:18 +0100 Subject: [PATCH 2/2] Use parallel_backend since parallel_config is not supported for older supported joblib versions --- sklearn/feature_selection/tests/test_rfe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index fa7650b49202c..74c716054cb70 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from joblib import parallel_config +from joblib import parallel_backend from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal from sklearn.base import BaseEstimator, ClassifierMixin @@ -718,7 +718,7 @@ def test_rfe_with_joblib_threading_backend(global_random_seed): rfe.fit(X, y) ranking_ref = rfe.ranking_ - with parallel_config(backend="threading"): + with parallel_backend("threading"): rfe.fit(X, y) assert_array_equal(ranking_ref, rfe.ranking_)