From c15e6e2dfda8dfd9803544a86ea701313cac8ff6 Mon Sep 17 00:00:00 2001
From: Vasco Pereira <vasco.serpa.pereira@tecnico.ulisboa.pt>
Date: Wed, 26 Mar 2025 12:46:27 +0000
Subject: [PATCH 1/3] Fix #30936: ElasticNet l1 ratio fails for float-only
 arrays Added `_is_l1_ratio_one` function to handle l1_ratio as array, list,
 or tuple. Uses `np.isclose` and `np.any` to check if at least one element is
 approximately equal to 1.0.

---
 sklearn/feature_selection/_from_model.py      |  9 +++-
 .../tests/test_from_model.py                  | 41 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index d73b53eea647e..cd69b15a7c09d 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -26,6 +26,11 @@
 )
 from ._base import SelectorMixin, _get_feature_importances
 
+def _is_l1_ratio_one(value):
+    """Helper to safely check if l1_ratio is (or includes) 1.0"""
+    if isinstance(value, (list, tuple, np.ndarray)):
+        return np.any(np.isclose(value, 1.0))
+    return np.isclose(value, 1.0)
 
 def _calculate_threshold(estimator, importances, threshold):
     """Interpret the threshold value"""
@@ -36,8 +41,8 @@ def _calculate_threshold(estimator, importances, threshold):
         is_l1_penalized = hasattr(estimator, "penalty") and estimator.penalty == "l1"
         is_lasso = "Lasso" in est_name
         is_elasticnet_l1_penalized = "ElasticNet" in est_name and (
-            (hasattr(estimator, "l1_ratio_") and np.isclose(estimator.l1_ratio_, 1.0))
-            or (hasattr(estimator, "l1_ratio") and np.isclose(estimator.l1_ratio, 1.0))
+            (hasattr(estimator, "l1_ratio_") and _is_l1_ratio_one(estimator.l1_ratio_)) or
+            (hasattr(estimator, "l1_ratio") and _is_l1_ratio_one(estimator.l1_ratio))
         )
         if is_l1_penalized or is_lasso or is_elasticnet_l1_penalized:
             # the natural default threshold is 0 when l1 penalty was used
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 421f575c92a0e..ee0c5ed733353 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -8,7 +8,7 @@
 from sklearn import datasets
 from sklearn.base import BaseEstimator
 from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression
-from sklearn.datasets import make_friedman1
+from sklearn.datasets import make_friedman1, make_regression
 from sklearn.decomposition import PCA
 from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
 from sklearn.exceptions import NotFittedError
@@ -687,3 +687,42 @@ def test_from_model_estimator_attribute_error():
         from_model.fit(data, y).partial_fit(data)
     assert isinstance(exec_info.value.__cause__, AttributeError)
     assert inner_msg in str(exec_info.value.__cause__)
+
+def test_is_l1_ratio_one():
+    """Test _is_l1_ratio_one helper function"""
+    from sklearn.feature_selection._from_model import _is_l1_ratio_one
+    # Test single values
+    assert _is_l1_ratio_one(1.0) == True
+    assert _is_l1_ratio_one(0.9) == False
+    assert _is_l1_ratio_one(0.0) == False
+    # Test lists
+    assert _is_l1_ratio_one([0.5, 1.0, 0.8]) == True
+    assert _is_l1_ratio_one([0.5, 0.8]) == False
+    # Test numpy arrays
+    assert _is_l1_ratio_one(np.array([0.5, 1.0])) == True
+    assert _is_l1_ratio_one(np.array([0.5, 0.8])) == False
+    # Test empty sequences
+    assert _is_l1_ratio_one([]) == False
+    assert _is_l1_ratio_one(np.array([])) == False
+
+def test_invalid_l1_ratio():
+    """Test that invalid l1_ratio values are handled correctly"""
+    # Values outside [0,1] should work but return False
+    clf = ElasticNetCV(l1_ratio=[0.5, 1.1])
+    model = SelectFromModel(estimator=clf)
+    X, y = make_regression(n_samples=100, n_features=10)
+    model.fit(X, y)
+    # Model should still work, just ignoring invalid l1_ratio
+    assert model.transform(X).shape[1] < X.shape[1]
+
+def test_l1_ratio_edge_cases():
+    """Test edge cases for l1_ratio detection"""
+    from sklearn.feature_selection._from_model import _is_l1_ratio_one
+    # Test values very close to 1
+    assert _is_l1_ratio_one(1.0 + 1e-10) == True
+    assert _is_l1_ratio_one([0.5, 1.0 + 1e-10]) == True
+    # Test mixed valid/invalid values
+    assert _is_l1_ratio_one([0.5, 1.1, 1.0]) == True
+    # Test non-numeric values
+    with pytest.raises(TypeError):
+        _is_l1_ratio_one(['a', 'b'])

From 593224f9c236a0b3af5de6b2c182f279f3970265 Mon Sep 17 00:00:00 2001
From: Vasco Pereira <vasco.serpa.pereira@tecnico.ulisboa.pt>
Date: Mon, 21 Apr 2025 18:49:47 +0100
Subject: [PATCH 2/3] =?UTF-8?q?Fix=20scikit-learn#30936:=20Add=20=20`is=5F?=
 =?UTF-8?q?elasticnetcv=5Fl1=5Fpenalized`=20branch=20so=20that=20ElasticNe?=
 =?UTF-8?q?tCV=20=20works=20with=20a=20list=20of=20floats;=20Add=20non?=
 =?UTF-8?q?=E2=80=91regression=20test=20`test=5Fget=5Ffeature=5Fnames=5Fou?=
 =?UTF-8?q?t=5Felasticnetcv`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 sklearn/feature_selection/_from_model.py      | 20 +++----
 .../tests/test_from_model.py                  | 54 ++++++-------------
 2 files changed, 26 insertions(+), 48 deletions(-)

diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index cd69b15a7c09d..92654821c9dff 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -26,11 +26,6 @@
 )
 from ._base import SelectorMixin, _get_feature_importances
 
-def _is_l1_ratio_one(value):
-    """Helper to safely check if l1_ratio is (or includes) 1.0"""
-    if isinstance(value, (list, tuple, np.ndarray)):
-        return np.any(np.isclose(value, 1.0))
-    return np.isclose(value, 1.0)
 
 def _calculate_threshold(estimator, importances, threshold):
     """Interpret the threshold value"""
@@ -40,11 +35,18 @@ def _calculate_threshold(estimator, importances, threshold):
         est_name = estimator.__class__.__name__
         is_l1_penalized = hasattr(estimator, "penalty") and estimator.penalty == "l1"
         is_lasso = "Lasso" in est_name
-        is_elasticnet_l1_penalized = "ElasticNet" in est_name and (
-            (hasattr(estimator, "l1_ratio_") and _is_l1_ratio_one(estimator.l1_ratio_)) or
-            (hasattr(estimator, "l1_ratio") and _is_l1_ratio_one(estimator.l1_ratio))
+        is_elasticnet_l1_penalized = est_name == "ElasticNet" and (
+            hasattr(estimator, "l1_ratio") and np.isclose(estimator.l1_ratio, 1.0)
         )
-        if is_l1_penalized or is_lasso or is_elasticnet_l1_penalized:
+        is_elasticnetcv_l1_penalized = est_name == "ElasticNetCV" and (
+            hasattr(estimator, "l1_ratio_") and np.isclose(estimator.l1_ratio_, 1.0)
+        )
+        if (
+            is_l1_penalized
+            or is_lasso
+            or is_elasticnet_l1_penalized
+            or is_elasticnetcv_l1_penalized
+        ):
             # the natural default threshold is 0 when l1 penalty was used
             threshold = 1e-5
         else:
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index ee0c5ed733353..53d33f6ebc898 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -489,6 +489,21 @@ def test_prefit_max_features():
         model.transform(data)
 
 
+def test_get_feature_names_out_elasticnetcv():
+    """Check if ElasticNetCV works with a list of floats.
+
+    Non-regression test for #30936."""
+    X, y = make_regression(n_samples=100, n_features=5, n_informative=3, random_state=0)
+    estimator = ElasticNetCV(l1_ratio=[0.25, 0.5, 0.75], random_state=0)
+    selector = SelectFromModel(estimator=estimator)
+    selector.fit(X, y)
+
+    names_out = selector.get_feature_names_out()
+    mask = selector.get_support()
+    expected = np.array([f"x{i}" for i in range(X.shape[1])])[mask]
+    assert_array_equal(names_out, expected)
+
+
 def test_prefit_get_feature_names_out():
     """Check the interaction between prefit and the feature names."""
     clf = RandomForestClassifier(n_estimators=2, random_state=0)
@@ -687,42 +702,3 @@ def test_from_model_estimator_attribute_error():
         from_model.fit(data, y).partial_fit(data)
     assert isinstance(exec_info.value.__cause__, AttributeError)
     assert inner_msg in str(exec_info.value.__cause__)
-
-def test_is_l1_ratio_one():
-    """Test _is_l1_ratio_one helper function"""
-    from sklearn.feature_selection._from_model import _is_l1_ratio_one
-    # Test single values
-    assert _is_l1_ratio_one(1.0) == True
-    assert _is_l1_ratio_one(0.9) == False
-    assert _is_l1_ratio_one(0.0) == False
-    # Test lists
-    assert _is_l1_ratio_one([0.5, 1.0, 0.8]) == True
-    assert _is_l1_ratio_one([0.5, 0.8]) == False
-    # Test numpy arrays
-    assert _is_l1_ratio_one(np.array([0.5, 1.0])) == True
-    assert _is_l1_ratio_one(np.array([0.5, 0.8])) == False
-    # Test empty sequences
-    assert _is_l1_ratio_one([]) == False
-    assert _is_l1_ratio_one(np.array([])) == False
-
-def test_invalid_l1_ratio():
-    """Test that invalid l1_ratio values are handled correctly"""
-    # Values outside [0,1] should work but return False
-    clf = ElasticNetCV(l1_ratio=[0.5, 1.1])
-    model = SelectFromModel(estimator=clf)
-    X, y = make_regression(n_samples=100, n_features=10)
-    model.fit(X, y)
-    # Model should still work, just ignoring invalid l1_ratio
-    assert model.transform(X).shape[1] < X.shape[1]
-
-def test_l1_ratio_edge_cases():
-    """Test edge cases for l1_ratio detection"""
-    from sklearn.feature_selection._from_model import _is_l1_ratio_one
-    # Test values very close to 1
-    assert _is_l1_ratio_one(1.0 + 1e-10) == True
-    assert _is_l1_ratio_one([0.5, 1.0 + 1e-10]) == True
-    # Test mixed valid/invalid values
-    assert _is_l1_ratio_one([0.5, 1.1, 1.0]) == True
-    # Test non-numeric values
-    with pytest.raises(TypeError):
-        _is_l1_ratio_one(['a', 'b'])

From b0f51a42c6a3fa41dfb55ad5caee3c69985b7a02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?=
 <jeremie@probabl.ai>
Date: Tue, 6 May 2025 14:46:24 +0200
Subject: [PATCH 3/3] add a changelog entry

---
 .../upcoming_changes/sklearn.feature_selection/31107.fix.rst  | 4 ++++
 sklearn/feature_selection/tests/test_from_model.py            | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.feature_selection/31107.fix.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.feature_selection/31107.fix.rst b/doc/whats_new/upcoming_changes/sklearn.feature_selection/31107.fix.rst
new file mode 100644
index 0000000000000..b5ca4ab283434
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.feature_selection/31107.fix.rst
@@ -0,0 +1,4 @@
+- :class:`feature_selection.SelectFromModel` now correctly works when the estimator
+  is an instance of :class:`linear_model.ElasticNetCV` with its `l1_ratio` parameter
+  being an array-like.
+  By :user:`Vasco Pereira <vasco-s-pereira>`.
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 53d33f6ebc898..17bedf44748fb 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -493,7 +493,7 @@ def test_get_feature_names_out_elasticnetcv():
     """Check if ElasticNetCV works with a list of floats.
 
     Non-regression test for #30936."""
-    X, y = make_regression(n_samples=100, n_features=5, n_informative=3, random_state=0)
+    X, y = make_regression(n_features=5, n_informative=3, random_state=0)
     estimator = ElasticNetCV(l1_ratio=[0.25, 0.5, 0.75], random_state=0)
     selector = SelectFromModel(estimator=estimator)
     selector.fit(X, y)