From f6ff0edcb0a478de5f667d44344ccbc74b0b48e8 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 17 Jun 2025 20:06:55 +0500
Subject: [PATCH 01/24] ENH Add array api support for PolynomialFeatures

---
 sklearn/preprocessing/_polynomial.py          | 37 ++++++++---
 .../preprocessing/tests/test_polynomial.py    | 66 ++++++++++++++++++-
 sklearn/utils/_array_api.py                   | 16 +++--
 3 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 69bfe7b212bba..61ed43a28ae6e 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -15,6 +15,13 @@
 from scipy.interpolate import BSpline
 from scipy.special import comb
 
+from sklearn.utils._array_api import (
+    _is_numpy_namespace,
+    _modify_in_place_if_numpy,
+    get_namespace_and_device,
+    supported_float_dtypes,
+)
+
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array
 from ..utils._param_validation import Interval, StrOptions
@@ -415,18 +422,18 @@ def transform(self, X):
             `csr_matrix`.
         """
         check_is_fitted(self)
-
+        xp, _, device_ = get_namespace_and_device(X)
         X = validate_data(
             self,
             X,
             order="F",
-            dtype=FLOAT_DTYPES,
+            dtype=supported_float_dtypes(xp=xp, device=device_),
             reset=False,
             accept_sparse=("csr", "csc"),
         )
 
         n_samples, n_features = X.shape
-        max_int32 = np.iinfo(np.int32).max
+        max_int32 = xp.iinfo(xp.int32).max
         if sparse.issparse(X) and X.format == "csr":
             if self._max_degree > 3:
                 return self.transform(X.tocsc()).tocsr()
@@ -496,8 +503,14 @@ def transform(self, X):
         else:
             # Do as if _min_degree = 0 and cut down array after the
             # computation, i.e. use _n_out_full instead of n_output_features_.
-            XP = np.empty(
-                shape=(n_samples, self._n_out_full), dtype=X.dtype, order=self.order
+            order_kwargs = {}
+            if _is_numpy_namespace(xp=xp):
+                order_kwargs["order"] = self.order
+            XP = xp.empty(
+                shape=(n_samples, self._n_out_full),
+                dtype=X.dtype,
+                device=device_,
+                **order_kwargs,
             )
 
             # What follows is a faster implementation of:
@@ -543,11 +556,12 @@ def transform(self, X):
                         break
                     # XP[:, start:end] are terms of degree d - 1
                     # that exclude feature #feature_idx.
-                    np.multiply(
+                    XP[:, current_col:next_col] = _modify_in_place_if_numpy(
+                        xp,
+                        xp.multiply,
                         XP[:, start:end],
                         X[:, feature_idx : feature_idx + 1],
                         out=XP[:, current_col:next_col],
-                        casting="no",
                     )
                     current_col = next_col
 
@@ -557,13 +571,16 @@ def transform(self, X):
             if self._min_degree > 1:
                 n_XP, n_Xout = self._n_out_full, self.n_output_features_
                 if self.include_bias:
-                    Xout = np.empty(
-                        shape=(n_samples, n_Xout), dtype=XP.dtype, order=self.order
+                    Xout = xp.empty(
+                        shape=(n_samples, n_Xout),
+                        dtype=XP.dtype,
+                        device=device_,
+                        **order_kwargs,
                     )
                     Xout[:, 0] = 1
                     Xout[:, 1:] = XP[:, n_XP - n_Xout + 1 :]
                 else:
-                    Xout = XP[:, n_XP - n_Xout :].copy()
+                    Xout = xp.asarray(XP[:, n_XP - n_Xout :], copy=True)
                 XP = Xout
         return XP
 
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 640bf5705baad..8c76b2978c581 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -7,6 +7,7 @@
 from scipy.interpolate import BSpline
 from scipy.sparse import random as sparse_random
 
+from sklearn._config import config_context
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import (
@@ -17,7 +18,13 @@
 from sklearn.preprocessing._csr_polynomial_expansion import (
     _get_sizeof_LARGEST_INT_t,
 )
-from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    _get_namespace_device_dtype_ids,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import _array_api_for_tests, assert_array_almost_equal
 from sklearn.utils.fixes import (
     CSC_CONTAINERS,
     CSR_CONTAINERS,
@@ -1228,3 +1235,60 @@ def test_csr_polynomial_expansion_windows_fail(csr_container):
         X_trans = pf.fit_transform(X)
         for idx in range(3):
             assert X_trans[0, expected_indices[idx]] == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+@pytest.mark.parametrize(
+    "degree, include_bias, interaction_only",
+    [
+        (2, True, False),
+        (2, False, False),
+        (2, True, True),
+        (2, False, True),
+        ((2, 2), True, False),
+        ((2, 2), False, False),
+        ((2, 2), True, True),
+        ((2, 2), False, True),
+        (3, True, False),
+        (3, False, False),
+        (3, True, True),
+        (3, False, True),
+        ((2, 3), True, False),
+        ((2, 3), False, False),
+        ((2, 3), True, True),
+        ((2, 3), False, True),
+        ((3, 3), True, False),
+        ((3, 3), False, False),
+        ((3, 3), True, True),
+    ],
+)
+def test_polynomial_features_array_api_compliance(
+    two_features_degree3,
+    degree,
+    include_bias,
+    interaction_only,
+    array_namespace,
+    device,
+    dtype_name,
+):
+    """Test PolynomialFeatures on 2 features up to degree 3."""
+    xp = _array_api_for_tests(array_namespace, device)
+    X, _ = two_features_degree3
+    X_np = X.astype(dtype_name)
+    X_xp = xp.asarray(X, device=device)
+    with config_context(array_api_dispatch=True):
+        tf_np = PolynomialFeatures(
+            degree=degree, include_bias=include_bias, interaction_only=interaction_only
+        ).fit(X_np)
+
+        tf_xp = PolynomialFeatures(
+            degree=degree, include_bias=include_bias, interaction_only=interaction_only
+        ).fit(X_xp)
+        out_np = tf_np.transform(X_np)
+        out_xp = tf_xp.transform(X_xp)
+        assert_allclose(_convert_to_numpy(out_xp, xp=xp), out_np)
+        assert get_namespace(out_xp)[0].__name__ == xp.__name__
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index b00173867f554..baba46153432a 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -290,7 +290,7 @@ def _isdtype_single(dtype, kind, *, xp):
         return dtype == kind
 
 
-def supported_float_dtypes(xp):
+def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace.
 
     Note: float16 is not officially part of the Array API spec at the
@@ -299,10 +299,18 @@ def supported_float_dtypes(xp):
 
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
-    if hasattr(xp, "float16"):
-        return (xp.float64, xp.float32, xp.float16)
+    if (
+        array_api_compat.is_torch_namespace(xp)
+        and getattr(device, "type", device) == "mps"
+    ):
+        dtypes = (xp.float32,)
     else:
-        return (xp.float64, xp.float32)
+        dtypes = (xp.float64, xp.float32)
+
+    if hasattr(xp, "float16"):
+        return (*dtypes, xp.float16)
+
+    return dtypes
 
 
 def ensure_common_namespace_device(reference, *arrays):

From 63111d4b41dc81dd881d70eb91d38e5560550978 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:15:52 +0500
Subject: [PATCH 02/24] Add benchmark

---
 benchmark.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 benchmark.py

diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000000000..fe931db718629
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,52 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from time import time
+
+import numpy as np
+import torch as xp
+from tqdm import tqdm
+
+from sklearn._config import config_context
+from sklearn.preprocessing._polynomial import PolynomialFeatures
+
+X_np = np.random.rand(100000, 10)
+X_xp_cuda = xp.asarray(X_np, device="cuda")
+
+# Numpy benchmarks
+fit_times = []
+transform_times = []
+for _ in tqdm(range(10), desc="Numpy Flow"):
+    start = time()
+    pf_np = PolynomialFeatures(degree=2)
+    pf_np.fit(X_np)
+    fit_times.append(time() - start)
+
+    start = time()
+    pf_np.transform(X_np)
+    transform_times.append(time() - start)
+
+avg_fit_time = sum(fit_times) / 10
+avg_transform_time = sum(transform_times) / 10
+print(f"Avg fit time for numpy: {avg_fit_time}")
+print(f"Avg transform time for numpy: {avg_transform_time}")
+
+
+# Torch cuda benchmarks
+fit_times = []
+transform_times = []
+for _ in tqdm(range(10), desc="Torch cuda Flow"):
+    with config_context(array_api_dispatch=True):
+        start = time()
+        pf_xp = PolynomialFeatures(degree=2)
+        pf_xp.fit(X_xp_cuda)
+        fit_times.append(time() - start)
+
+        start = time()
+        pf_xp.transform(X_xp_cuda)
+        transform_times.append(time() - start)
+
+avg_fit_time = sum(fit_times) / 10
+avg_transform_time = sum(transform_times) / 10
+print(f"Avg fit time for torch cuda: {avg_fit_time}")
+print(f"Avg transform time for torch cuda: {avg_transform_time}")

From e9e1b1669e73546a59d7828e671126159c524421 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:19:29 +0500
Subject: [PATCH 03/24] Add benchmark

---
 benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark.py b/benchmark.py
index fe931db718629..1f24f78e488d1 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -10,7 +10,7 @@
 from sklearn._config import config_context
 from sklearn.preprocessing._polynomial import PolynomialFeatures
 
-X_np = np.random.rand(100000, 10)
+X_np = np.random.rand(100000, 100)
 X_xp_cuda = xp.asarray(X_np, device="cuda")
 
 # Numpy benchmarks

From 17fa68a65d838f945a512ded55b5cae565009954 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:46:15 +0500
Subject: [PATCH 04/24] Remove benchmark file for testing

---
 benchmark.py | 52 ----------------------------------------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 benchmark.py

diff --git a/benchmark.py b/benchmark.py
deleted file mode 100644
index 1f24f78e488d1..0000000000000
--- a/benchmark.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-from time import time
-
-import numpy as np
-import torch as xp
-from tqdm import tqdm
-
-from sklearn._config import config_context
-from sklearn.preprocessing._polynomial import PolynomialFeatures
-
-X_np = np.random.rand(100000, 100)
-X_xp_cuda = xp.asarray(X_np, device="cuda")
-
-# Numpy benchmarks
-fit_times = []
-transform_times = []
-for _ in tqdm(range(10), desc="Numpy Flow"):
-    start = time()
-    pf_np = PolynomialFeatures(degree=2)
-    pf_np.fit(X_np)
-    fit_times.append(time() - start)
-
-    start = time()
-    pf_np.transform(X_np)
-    transform_times.append(time() - start)
-
-avg_fit_time = sum(fit_times) / 10
-avg_transform_time = sum(transform_times) / 10
-print(f"Avg fit time for numpy: {avg_fit_time}")
-print(f"Avg transform time for numpy: {avg_transform_time}")
-
-
-# Torch cuda benchmarks
-fit_times = []
-transform_times = []
-for _ in tqdm(range(10), desc="Torch cuda Flow"):
-    with config_context(array_api_dispatch=True):
-        start = time()
-        pf_xp = PolynomialFeatures(degree=2)
-        pf_xp.fit(X_xp_cuda)
-        fit_times.append(time() - start)
-
-        start = time()
-        pf_xp.transform(X_xp_cuda)
-        transform_times.append(time() - start)
-
-avg_fit_time = sum(fit_times) / 10
-avg_transform_time = sum(transform_times) / 10
-print(f"Avg fit time for torch cuda: {avg_fit_time}")
-print(f"Avg transform time for torch cuda: {avg_transform_time}")

From a72f6e94f4e2993a612c41e79d44da0af7bd8119 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:51:28 +0500
Subject: [PATCH 05/24] Add benchmark again for testing

---
 benchmark.py                                  | 52 +++++++++++++++++++
 .../preprocessing/tests/test_polynomial.py    |  2 +-
 2 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 benchmark.py

diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000000000..1f24f78e488d1
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,52 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from time import time
+
+import numpy as np
+import torch as xp
+from tqdm import tqdm
+
+from sklearn._config import config_context
+from sklearn.preprocessing._polynomial import PolynomialFeatures
+
+X_np = np.random.rand(100000, 100)
+X_xp_cuda = xp.asarray(X_np, device="cuda")
+
+# Numpy benchmarks
+fit_times = []
+transform_times = []
+for _ in tqdm(range(10), desc="Numpy Flow"):
+    start = time()
+    pf_np = PolynomialFeatures(degree=2)
+    pf_np.fit(X_np)
+    fit_times.append(time() - start)
+
+    start = time()
+    pf_np.transform(X_np)
+    transform_times.append(time() - start)
+
+avg_fit_time = sum(fit_times) / 10
+avg_transform_time = sum(transform_times) / 10
+print(f"Avg fit time for numpy: {avg_fit_time}")
+print(f"Avg transform time for numpy: {avg_transform_time}")
+
+
+# Torch cuda benchmarks
+fit_times = []
+transform_times = []
+for _ in tqdm(range(10), desc="Torch cuda Flow"):
+    with config_context(array_api_dispatch=True):
+        start = time()
+        pf_xp = PolynomialFeatures(degree=2)
+        pf_xp.fit(X_xp_cuda)
+        fit_times.append(time() - start)
+
+        start = time()
+        pf_xp.transform(X_xp_cuda)
+        transform_times.append(time() - start)
+
+avg_fit_time = sum(fit_times) / 10
+avg_transform_time = sum(transform_times) / 10
+print(f"Avg fit time for torch cuda: {avg_fit_time}")
+print(f"Avg transform time for torch cuda: {avg_transform_time}")
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 8c76b2978c581..7375240695c1e 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1275,7 +1275,7 @@ def test_polynomial_features_array_api_compliance(
     device,
     dtype_name,
 ):
-    """Test PolynomialFeatures on 2 features up to degree 3."""
+    """Test array API compliance for PolynomialFeatures on 2 features up to degree 3."""
     xp = _array_api_for_tests(array_namespace, device)
     X, _ = two_features_degree3
     X_np = X.astype(dtype_name)

From c58ad4633323f0581099e88e460de47002c874f8 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:53:03 +0500
Subject: [PATCH 06/24] Remove benchmark file for testing

---
 benchmark.py | 52 ----------------------------------------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 benchmark.py

diff --git a/benchmark.py b/benchmark.py
deleted file mode 100644
index 1f24f78e488d1..0000000000000
--- a/benchmark.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-from time import time
-
-import numpy as np
-import torch as xp
-from tqdm import tqdm
-
-from sklearn._config import config_context
-from sklearn.preprocessing._polynomial import PolynomialFeatures
-
-X_np = np.random.rand(100000, 100)
-X_xp_cuda = xp.asarray(X_np, device="cuda")
-
-# Numpy benchmarks
-fit_times = []
-transform_times = []
-for _ in tqdm(range(10), desc="Numpy Flow"):
-    start = time()
-    pf_np = PolynomialFeatures(degree=2)
-    pf_np.fit(X_np)
-    fit_times.append(time() - start)
-
-    start = time()
-    pf_np.transform(X_np)
-    transform_times.append(time() - start)
-
-avg_fit_time = sum(fit_times) / 10
-avg_transform_time = sum(transform_times) / 10
-print(f"Avg fit time for numpy: {avg_fit_time}")
-print(f"Avg transform time for numpy: {avg_transform_time}")
-
-
-# Torch cuda benchmarks
-fit_times = []
-transform_times = []
-for _ in tqdm(range(10), desc="Torch cuda Flow"):
-    with config_context(array_api_dispatch=True):
-        start = time()
-        pf_xp = PolynomialFeatures(degree=2)
-        pf_xp.fit(X_xp_cuda)
-        fit_times.append(time() - start)
-
-        start = time()
-        pf_xp.transform(X_xp_cuda)
-        transform_times.append(time() - start)
-
-avg_fit_time = sum(fit_times) / 10
-avg_transform_time = sum(transform_times) / 10
-print(f"Avg fit time for torch cuda: {avg_fit_time}")
-print(f"Avg transform time for torch cuda: {avg_transform_time}")

From 1518126b1a97e883beadf56eb96193255ec7a611 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 11:54:55 +0500
Subject: [PATCH 07/24] Add in documentation

---
 doc/modules/array_api.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 6139c8e8b2863..1b8f1a16a7ee7 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -117,6 +117,7 @@ Estimators
 - :class:`preprocessing.MaxAbsScaler`
 - :class:`preprocessing.MinMaxScaler`
 - :class:`preprocessing.Normalizer`
+- :class:`preprocessing.PolynomialFeatures`
 
 Meta-estimators
 ---------------

From 2e965849b05e1aece9e3fe2c0098b719d41e9c10 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Wed, 18 Jun 2025 12:04:43 +0500
Subject: [PATCH 08/24] Add changelog

---
 doc/whats_new/upcoming_changes/array-api/31580.feature.rst | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/array-api/31580.feature.rst

diff --git a/doc/whats_new/upcoming_changes/array-api/31580.feature.rst b/doc/whats_new/upcoming_changes/array-api/31580.feature.rst
new file mode 100644
index 0000000000000..3d7aaa4372109
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/array-api/31580.feature.rst
@@ -0,0 +1,2 @@
+- :class:`preprocessing.PolynomialFeatures` now supports array API compatible inputs.
+  By :user:`Omar Salman <OmarManzoor>`

From a9055f54c1f5ed66d22b06a54fbe2528d21a8601 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 19 Jun 2025 10:59:52 +0500
Subject: [PATCH 09/24] Refactor code in supported_float_dtypes

---
 sklearn/utils/_array_api.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index baba46153432a..62983bc51e9d7 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -299,14 +299,12 @@ def supported_float_dtypes(xp, device=None):
 
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
-    if (
-        array_api_compat.is_torch_namespace(xp)
-        and getattr(device, "type", device) == "mps"
-    ):
-        dtypes = (xp.float32,)
-    else:
-        dtypes = (xp.float64, xp.float32)
-
+    dtypes_dict = xp.__array_namespace_info__().dtypes(
+        kind="real floating", device=device
+    )
+    # The returned dict contains the dtypes in the order float32, float64. Thus
+    # we reverse this ordering to ensure that the highest precision comes first.
+    dtypes = tuple(dtypes_dict.values())[::-1]
     if hasattr(xp, "float16"):
         return (*dtypes, xp.float16)
 

From 7aaa83b27de76017c1e84f186e5142501869ab8b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 19 Jun 2025 20:44:35 +0500
Subject: [PATCH 10/24] Update the supported float dtypes function

---
 sklearn/utils/_array_api.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 53ec0d168770c..459907cdb9bbf 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -302,13 +302,15 @@ def supported_float_dtypes(xp, device=None):
     dtypes_dict = xp.__array_namespace_info__().dtypes(
         kind="real floating", device=device
     )
-    # The returned dict contains the dtypes in the order float32, float64. Thus
-    # we reverse this ordering to ensure that the highest precision comes first.
-    dtypes = tuple(dtypes_dict.values())[::-1]
+    valid_float_dtypes = []
+    for dtype_key in ("float64", "float32"):
+        if dtype_key in dtypes_dict:
+            valid_float_dtypes.append(dtypes_dict[dtype_key])
+
     if hasattr(xp, "float16"):
-        return (*dtypes, xp.float16)
+        valid_float_dtypes.append(xp.float16)
 
-    return dtypes
+    return tuple(valid_float_dtypes)
 
 
 def ensure_common_namespace_device(reference, *arrays):

From 6620df5744765c2acd705ed5b04c2141819b4d74 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 20 Jun 2025 19:55:44 +0500
Subject: [PATCH 11/24] Add device check in test

---
 sklearn/preprocessing/tests/test_polynomial.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 7375240695c1e..4169a53966b0e 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -21,6 +21,7 @@
 from sklearn.utils._array_api import (
     _convert_to_numpy,
     _get_namespace_device_dtype_ids,
+    device,
     get_namespace,
     yield_namespace_device_dtype_combinations,
 )
@@ -1238,7 +1239,7 @@ def test_csr_polynomial_expansion_windows_fail(csr_container):
 
 
 @pytest.mark.parametrize(
-    "array_namespace, device, dtype_name",
+    "array_namespace, device_, dtype_name",
     yield_namespace_device_dtype_combinations(),
     ids=_get_namespace_device_dtype_ids,
 )
@@ -1272,14 +1273,14 @@ def test_polynomial_features_array_api_compliance(
     include_bias,
     interaction_only,
     array_namespace,
-    device,
+    device_,
     dtype_name,
 ):
     """Test array API compliance for PolynomialFeatures on 2 features up to degree 3."""
-    xp = _array_api_for_tests(array_namespace, device)
+    xp = _array_api_for_tests(array_namespace, device_)
     X, _ = two_features_degree3
     X_np = X.astype(dtype_name)
-    X_xp = xp.asarray(X, device=device)
+    X_xp = xp.asarray(X, device=device_)
     with config_context(array_api_dispatch=True):
         tf_np = PolynomialFeatures(
             degree=degree, include_bias=include_bias, interaction_only=interaction_only
@@ -1292,3 +1293,4 @@ def test_polynomial_features_array_api_compliance(
         out_xp = tf_xp.transform(X_xp)
         assert_allclose(_convert_to_numpy(out_xp, xp=xp), out_np)
         assert get_namespace(out_xp)[0].__name__ == xp.__name__
+        assert device(out_xp) == device(X_xp)

From 596e10c8a66bd7bd7d97ffb408b3b5bfb03009a8 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Mon, 23 Jun 2025 12:32:59 +0500
Subject: [PATCH 12/24] Add array api tag

---
 sklearn/preprocessing/_polynomial.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 61ed43a28ae6e..09ffe1ac47d9d 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -587,6 +587,7 @@ def transform(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.sparse = True
+        tags.array_api_support = True
         return tags
 
 

From d171ea15e05da884bbf7618e665321e186e92846 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 26 Jun 2025 13:53:53 +0500
Subject: [PATCH 13/24] Address PR suggestions

---
 sklearn/preprocessing/_polynomial.py          | 24 ++++++++++++-------
 .../preprocessing/tests/test_polynomial.py    | 22 +++++++++++++++++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 09ffe1ac47d9d..d523328956e94 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -17,7 +17,6 @@
 
 from sklearn.utils._array_api import (
     _is_numpy_namespace,
-    _modify_in_place_if_numpy,
     get_namespace_and_device,
     supported_float_dtypes,
 )
@@ -506,6 +505,11 @@ def transform(self, X):
             order_kwargs = {}
             if _is_numpy_namespace(xp=xp):
                 order_kwargs["order"] = self.order
+            elif self.order == "F":
+                raise AttributeError(
+                    "PolynomialFeatures does not support order=F for the array API"
+                )
+
             XP = xp.empty(
                 shape=(n_samples, self._n_out_full),
                 dtype=X.dtype,
@@ -556,13 +560,17 @@ def transform(self, X):
                         break
                     # XP[:, start:end] are terms of degree d - 1
                     # that exclude feature #feature_idx.
-                    XP[:, current_col:next_col] = _modify_in_place_if_numpy(
-                        xp,
-                        xp.multiply,
-                        XP[:, start:end],
-                        X[:, feature_idx : feature_idx + 1],
-                        out=XP[:, current_col:next_col],
-                    )
+                    if _is_numpy_namespace(xp):
+                        np.multiply(
+                            XP[:, start:end],
+                            X[:, feature_idx : feature_idx + 1],
+                            out=XP[:, current_col:next_col],
+                            casting="no",
+                        )
+                    else:
+                        XP[:, current_col:next_col] = xp.multiply(
+                            XP[:, start:end], X[:, feature_idx : feature_idx + 1]
+                        )
                     current_col = next_col
 
                 new_index.append(current_col)
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 4169a53966b0e..2e545845c3370 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -21,6 +21,7 @@
 from sklearn.utils._array_api import (
     _convert_to_numpy,
     _get_namespace_device_dtype_ids,
+    _is_numpy_namespace,
     device,
     get_namespace,
     yield_namespace_device_dtype_combinations,
@@ -1294,3 +1295,24 @@ def test_polynomial_features_array_api_compliance(
         assert_allclose(_convert_to_numpy(out_xp, xp=xp), out_np)
         assert get_namespace(out_xp)[0].__name__ == xp.__name__
         assert device(out_xp) == device(X_xp)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype_name",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+def test_polynomial_features_array_api_raises_on_order_F(
+    array_namespace, device_, dtype_name
+):
+    xp = _array_api_for_tests(array_namespace, device_)
+    X = np.arange(6).reshape((3, 2)).astype(dtype_name)
+    X_xp = xp.asarray(X, device=device_)
+    msg = "PolynomialFeatures does not support order=F for the array API"
+    with config_context(array_api_dispatch=True):
+        pf = PolynomialFeatures(order="F").fit(X_xp)
+        if _is_numpy_namespace(xp):
+            pf.transform(X_xp)
+        else:
+            with pytest.raises(AttributeError, match=msg):
+                pf.transform(X_xp)

From e9fdca1463d29e0c272b1d5742acc5d7a454aaa5 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 26 Jun 2025 14:13:22 +0500
Subject: [PATCH 14/24] Minor updates

---
 sklearn/preprocessing/tests/test_polynomial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 2e545845c3370..8b00aa6eb51d7 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1308,10 +1308,10 @@ def test_polynomial_features_array_api_raises_on_order_F(
     xp = _array_api_for_tests(array_namespace, device_)
     X = np.arange(6).reshape((3, 2)).astype(dtype_name)
     X_xp = xp.asarray(X, device=device_)
-    msg = "PolynomialFeatures does not support order=F for the array API"
+    msg = "PolynomialFeatures does not support order='F' for the array API"
     with config_context(array_api_dispatch=True):
         pf = PolynomialFeatures(order="F").fit(X_xp)
-        if _is_numpy_namespace(xp):
+        if _is_numpy_namespace(xp):  # Numpy should not raise
             pf.transform(X_xp)
         else:
             with pytest.raises(AttributeError, match=msg):

From 66084a763aa1d8e6225baa4624910105770f7936 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 26 Jun 2025 14:18:45 +0500
Subject: [PATCH 15/24] Minor fix

---
 sklearn/preprocessing/_polynomial.py | 265 +++++++--------------------
 1 file changed, 65 insertions(+), 200 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index c1442f169d431..a830b9c3428b0 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -23,7 +23,6 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array
-from ..utils._mask import _get_mask
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.fixes import parse_version, sp_version
 from ..utils.stats import _weighted_percentile
@@ -508,7 +507,7 @@ def transform(self, X):
                 order_kwargs["order"] = self.order
             elif self.order == "F":
                 raise AttributeError(
-                    "PolynomialFeatures does not support order=F for the array API"
+                    "PolynomialFeatures does not support order='F' for the array API"
                 )
 
             XP = xp.empty(
@@ -665,20 +664,6 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
         Order of output array in the dense case. `'F'` order is faster to compute, but
         may slow down subsequent estimators.
 
-    handle_missing : {'error', 'zeros'}, default='error'
-        Specifies the way missing values are handled.
-
-        - 'error' : Raise an error if `np.nan` values are present during :meth:`fit`.
-        - 'zeros' : Encode splines of missing values with values `0`.
-
-        Note that `handle_missing='zeros'` differs from first imputing missing values
-        with zeros and then creating the spline basis. The latter creates spline basis
-        functions which have non-zero values at the missing values
-        whereas this option simply sets all spline basis function values to zero at the
-        missing values.
-
-        .. versionadded:: 1.8
-
     sparse_output : bool, default=False
         Will return sparse CSR matrix if set True else will return an array.
 
@@ -745,7 +730,6 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
         ],
         "include_bias": ["boolean"],
         "order": [StrOptions({"C", "F"})],
-        "handle_missing": [StrOptions({"error", "zeros"})],
         "sparse_output": ["boolean"],
     }
 
@@ -758,7 +742,6 @@ def __init__(
         extrapolation="constant",
         include_bias=True,
         order="C",
-        handle_missing="error",
         sparse_output=False,
     ):
         self.n_knots = n_knots
@@ -767,12 +750,11 @@ def __init__(
         self.extrapolation = extrapolation
         self.include_bias = include_bias
         self.order = order
-        self.handle_missing = handle_missing
         self.sparse_output = sparse_output
 
     @staticmethod
     def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None):
-        """Calculate base knot positions for `knots` either "uniform" or "quantile".
+        """Calculate base knot positions.
 
         Base knots such that first knot <= feature <= last knot. For the
         B-spline construction with scipy.interpolate.BSpline, 2*degree knots
@@ -789,7 +771,7 @@ def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None)
             )
 
             if sample_weight is None:
-                knots = np.nanpercentile(X, percentile_ranks, axis=0)
+                knots = np.percentile(X, percentile_ranks, axis=0)
             else:
                 knots = np.array(
                     [
@@ -804,15 +786,8 @@ def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None)
             # `else` is therefore safe.
             # Disregard observations with zero weight.
             mask = slice(None, None, 1) if sample_weight is None else sample_weight > 0
-            x_min = np.zeros(X.shape[1], dtype=np.float64)
-            x_max = np.zeros(X.shape[1], dtype=np.float64)
-            for feature_idx in range(X.shape[1]):
-                x = X[mask, feature_idx]
-                if np.all(np.isnan(x)):
-                    continue
-                else:
-                    x_min[feature_idx] = np.nanmin(x)
-                    x_max[feature_idx] = np.nanmax(x)
+            x_min = np.amin(X[mask], axis=0)
+            x_max = np.amax(X[mask], axis=0)
 
             knots = np.linspace(
                 start=x_min,
@@ -876,26 +851,14 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted transformer.
         """
-        try:
-            X = validate_data(
-                self,
-                X,
-                reset=True,
-                accept_sparse=False,
-                ensure_min_samples=2,
-                ensure_2d=True,
-                ensure_all_finite=(self.handle_missing != "zeros"),
-            )
-        except ValueError as e:
-            if "Input X contains NaN." in str(e) and self.handle_missing == "error":
-                raise ValueError(
-                    "Input X contains NaN values and `SplineTransformer` is configured "
-                    "to error in this case (handle_missing='error'). To avoid this "
-                    "error, set handle_missing='zeros' to encode missing values as "
-                    "splines with value 0 or ensure no missing values in X."
-                ) from e
-            raise e
-
+        X = validate_data(
+            self,
+            X,
+            reset=True,
+            accept_sparse=False,
+            ensure_min_samples=2,
+            ensure_2d=True,
+        )
         if sample_weight is not None:
             sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
 
@@ -903,10 +866,7 @@ def fit(self, X, y=None, sample_weight=None):
 
         if isinstance(self.knots, str):
             base_knots = self._get_base_knot_positions(
-                X,
-                n_knots=self.n_knots,
-                knots=self.knots,
-                sample_weight=sample_weight,
+                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight
             )
         else:
             base_knots = check_array(self.knots, dtype=np.float64)
@@ -1014,21 +974,14 @@ def transform(self, X):
         """
         check_is_fitted(self)
 
-        X = validate_data(
-            self,
-            X,
-            reset=False,
-            accept_sparse=False,
-            ensure_2d=True,
-            ensure_all_finite=(self.handle_missing != "zeros"),
-        )
+        X = validate_data(self, X, reset=False, accept_sparse=False, ensure_2d=True)
 
         n_samples, n_features = X.shape
         n_splines = self.bsplines_[0].c.shape[1]
         degree = self.degree
 
         # TODO: Remove this condition, once scipy 1.10 is the minimum version.
-        #       Only scipy >= 1.10 supports design_matrix(.., extrapolate=..).
+        #       Only scipy => 1.10 supports design_matrix(.., extrapolate=..).
         #       The default (implicit in scipy < 1.10) is extrapolate=False.
         scipy_1_10 = sp_version >= parse_version("1.10.0")
         # Note: self.bsplines_[0].extrapolate is True for extrapolation in
@@ -1052,10 +1005,8 @@ def transform(self, X):
         else:
             XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)
 
-        for feature_idx in range(n_features):
-            spl = self.bsplines_[feature_idx]
-            # Get indicator for nan values in the current column.
-            nan_row_indices = np.flatnonzero(_get_mask(X[:, feature_idx], np.nan))
+        for i in range(n_features):
+            spl = self.bsplines_[i]
 
             if self.extrapolation in ("continue", "error", "periodic"):
                 if self.extrapolation == "periodic":
@@ -1064,44 +1015,17 @@ def transform(self, X):
                     # This is equivalent to BSpline(.., extrapolate="periodic")
                     # for scipy>=1.0.0.
                     n = spl.t.size - spl.k - 1
-                    if spl.t[n] - spl.t[spl.k] > 0:
-                        # Assign to new array to avoid inplace operation
-                        x = spl.t[spl.k] + (X[:, feature_idx] - spl.t[spl.k]) % (
-                            spl.t[n] - spl.t[spl.k]
-                        )
-                    else:
-                        # This can happen if the column has a single non-nan
-                        # value. Treat as a constant feature.
-                        x = np.zeros_like(X[:, feature_idx])
-                else:  # self.extrapolation in ("continue", "error")
-                    x = X[:, feature_idx]
+                    # Assign to new array to avoid inplace operation
+                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (
+                        spl.t[n] - spl.t[spl.k]
+                    )
+                else:
+                    x = X[:, i]
 
                 if use_sparse:
-                    # We replace the nan values in the input column by some
-                    # arbitrary, in-range, numerical value since
-                    # BSpline.design_matrix() would otherwise raise on any nan
-                    # value in its input. The spline encoded values in
-                    # the output of that function that correspond to missing
-                    # values in the original input will be replaced by 0.0
-                    # afterwards.
-                    #
-                    # Note that in the following we use np.nanmin(x) as the
-                    # input replacement to make sure that this code works even
-                    # when `extrapolation == "error"`. Any other choice of
-                    # in-range value would have worked work since the
-                    # corresponding values in the array are replaced by zeros.
-                    if nan_row_indices.size == x.size:
-                        # The column is all np.nan valued. Replace it by a
-                        # constant column with an arbitrary non-nan value
-                        # inside so that it is encoded as constant column.
-                        x = np.zeros_like(x)  # avoid mutation of input data
-                    elif nan_row_indices.shape[0] > 0:
-                        x = x.copy()  # avoid mutation of input data
-                        x[nan_row_indices] = np.nanmin(x)
                     XBS_sparse = BSpline.design_matrix(
                         x, spl.t, spl.k, **kwargs_extrapolate
                     )
-
                     if self.extrapolation == "periodic":
                         # See the construction of coef in fit. We need to add the last
                         # degree spline basis function to the first degree ones and
@@ -1110,113 +1034,72 @@ def transform(self, X):
                         XBS_sparse = XBS_sparse.tolil()
                         XBS_sparse[:, :degree] += XBS_sparse[:, -degree:]
                         XBS_sparse = XBS_sparse[:, :-degree]
-
-                    if nan_row_indices.shape[0] > 0:
-                        # Note: See comment about SparseEfficiencyWarning below.
-                        XBS = XBS_sparse.tolil()
-
                 else:
-                    XBS[
-                        :, (feature_idx * n_splines) : ((feature_idx + 1) * n_splines)
-                    ] = spl(x)
-
-                # Replace any indicated values with 0:
-                if nan_row_indices.shape[0] > 0:
-                    for spline_idx in range(n_splines):
-                        output_feature_idx = n_splines * feature_idx + spline_idx
-                        XBS[
-                            nan_row_indices, output_feature_idx : output_feature_idx + 1
-                        ] = 0
-                    if use_sparse:
-                        XBS_sparse = XBS
-
+                    XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)
             else:  # extrapolation in ("constant", "linear")
                 xmin, xmax = spl.t[degree], spl.t[-degree - 1]
                 # spline values at boundaries
                 f_min, f_max = spl(xmin), spl(xmax)
-                # Values outside of the feature range during fit and nan values get
-                # filtered out:
-                inside_range_mask = (xmin <= X[:, feature_idx]) & (
-                    X[:, feature_idx] <= xmax
-                )
-
+                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)
                 if use_sparse:
-                    outside_range_mask = ~inside_range_mask
-                    x = X[:, feature_idx].copy()
-                    # Set to some arbitrary value within the range of values
-                    # observed on the training set before calling
-                    # BSpline.design_matrix. Those transformed will be
-                    # reassigned later when handling with extrapolation.
-                    x[outside_range_mask] = xmin
+                    mask_inv = ~mask
+                    x = X[:, i].copy()
+                    # Set some arbitrary values outside boundary that will be reassigned
+                    # later.
+                    x[mask_inv] = spl.t[self.degree]
                     XBS_sparse = BSpline.design_matrix(x, spl.t, spl.k)
                     # Note: Without converting to lil_matrix we would get:
                     # scipy.sparse._base.SparseEfficiencyWarning: Changing the sparsity
                     # structure of a csr_matrix is expensive. lil_matrix is more
                     # efficient.
-                    if np.any(outside_range_mask):
+                    if np.any(mask_inv):
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[outside_range_mask, :] = 0
-
+                        XBS_sparse[mask_inv, :] = 0
                 else:
-                    XBS[
-                        inside_range_mask,
-                        (feature_idx * n_splines) : ((feature_idx + 1) * n_splines),
-                    ] = spl(X[inside_range_mask, feature_idx])
+                    XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])
 
             # Note for extrapolation:
             # 'continue' is already returned as is by scipy BSplines
             if self.extrapolation == "error":
-                has_nan_output_values = False
-                if use_sparse:
-                    # Early convert to CSR as the sparsity structure of this
-                    # block should not change anymore. This is needed to be able
-                    # to safely assume that `.data` is a 1D array.
-                    XBS_sparse = XBS_sparse.tocsr()
-                    has_nan_output_values = np.any(np.isnan(XBS_sparse.data))
-                else:
-                    output_features = slice(
-                        feature_idx * n_splines, (feature_idx + 1) * n_splines
+                # BSpline with extrapolate=False does not raise an error, but
+                # outputs np.nan.
+                if (use_sparse and np.any(np.isnan(XBS_sparse.data))) or (
+                    not use_sparse
+                    and np.any(
+                        np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])
                     )
-                    has_nan_output_values = np.any(np.isnan(XBS[:, output_features]))
-
-                if has_nan_output_values:
+                ):
                     raise ValueError(
-                        "`X` contains values beyond the limits of the knots."
+                        "X contains values beyond the limits of the knots."
                     )
-
             elif self.extrapolation == "constant":
                 # Set all values beyond xmin and xmax to the value of the
                 # spline basis functions at those two positions.
                 # Only the first degree and last degree number of splines
                 # have non-zero values at the boundaries.
 
-                below_xmin_mask = X[:, feature_idx] < xmin
-                if np.any(below_xmin_mask):
+                mask = X[:, i] < xmin
+                if np.any(mask):
                     if use_sparse:
                         # Note: See comment about SparseEfficiencyWarning above.
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[below_xmin_mask, :degree] = f_min[:degree]
+                        XBS_sparse[mask, :degree] = f_min[:degree]
 
                     else:
-                        XBS[
-                            below_xmin_mask,
-                            (feature_idx * n_splines) : (
-                                feature_idx * n_splines + degree
-                            ),
-                        ] = f_min[:degree]
-
-                above_xmax_mask = X[:, feature_idx] > xmax
-                if np.any(above_xmax_mask):
+                        XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[
+                            :degree
+                        ]
+
+                mask = X[:, i] > xmax
+                if np.any(mask):
                     if use_sparse:
                         # Note: See comment about SparseEfficiencyWarning above.
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[above_xmax_mask, -degree:] = f_max[-degree:]
+                        XBS_sparse[mask, -degree:] = f_max[-degree:]
                     else:
                         XBS[
-                            above_xmax_mask,
-                            ((feature_idx + 1) * n_splines - degree) : (
-                                (feature_idx + 1) * n_splines
-                            ),
+                            mask,
+                            ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),
                         ] = f_max[-degree:]
 
             elif self.extrapolation == "linear":
@@ -1234,38 +1117,26 @@ def transform(self, X):
                     # boundary. For degree=0 it is the same as 'constant'.
                     degree += 1
                 for j in range(degree):
-                    below_xmin_mask = X[:, feature_idx] < xmin
-                    if np.any(below_xmin_mask):
-                        linear_extr = (
-                            f_min[j]
-                            + (X[below_xmin_mask, feature_idx] - xmin) * fp_min[j]
-                        )
+                    mask = X[:, i] < xmin
+                    if np.any(mask):
+                        linear_extr = f_min[j] + (X[mask, i] - xmin) * fp_min[j]
                         if use_sparse:
                             # Note: See comment about SparseEfficiencyWarning above.
                             XBS_sparse = XBS_sparse.tolil()
-                            XBS_sparse[below_xmin_mask, j] = linear_extr
+                            XBS_sparse[mask, j] = linear_extr
                         else:
-                            XBS[below_xmin_mask, feature_idx * n_splines + j] = (
-                                linear_extr
-                            )
+                            XBS[mask, i * n_splines + j] = linear_extr
 
-                    above_xmax_mask = X[:, feature_idx] > xmax
-                    if np.any(above_xmax_mask):
+                    mask = X[:, i] > xmax
+                    if np.any(mask):
                         k = n_splines - 1 - j
-                        linear_extr = (
-                            f_max[k]
-                            + (X[above_xmax_mask, feature_idx] - xmax) * fp_max[k]
-                        )
+                        linear_extr = f_max[k] + (X[mask, i] - xmax) * fp_max[k]
                         if use_sparse:
                             # Note: See comment about SparseEfficiencyWarning above.
                             XBS_sparse = XBS_sparse.tolil()
-                            XBS_sparse[above_xmax_mask, k : k + 1] = linear_extr[
-                                :, None
-                            ]
+                            XBS_sparse[mask, k : k + 1] = linear_extr[:, None]
                         else:
-                            XBS[above_xmax_mask, feature_idx * n_splines + k] = (
-                                linear_extr
-                            )
+                            XBS[mask, i * n_splines + k] = linear_extr
 
             if use_sparse:
                 XBS_sparse = XBS_sparse.tocsr()
@@ -1296,8 +1167,7 @@ def transform(self, X):
                 )
             XBS = sparse.hstack(output_list, format="csr")
         elif self.sparse_output:
-            # TODO: Remove conversion to csr, once scipy 1.10 is the minimum version:
-            # Adjust format of XBS to sparse, for scipy versions < 1.10.0:
+            # TODO: Remove ones scipy 1.10 is the minimum version. See comments above.
             XBS = sparse.csr_matrix(XBS)
 
         if self.include_bias:
@@ -1307,8 +1177,3 @@ def transform(self, X):
             # We chose the last one.
             indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]
             return XBS[:, indices]
-
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.allow_nan = self.handle_missing == "zeros"
-        return tags

From dd328c553ce1d0daf11be995ce7fca7ba1e2027f Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 26 Jun 2025 14:22:29 +0500
Subject: [PATCH 16/24] Sync with main

---
 sklearn/preprocessing/_polynomial.py | 263 ++++++++++++++++++++-------
 1 file changed, 199 insertions(+), 64 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index a830b9c3428b0..fcfa502ae9ba5 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -23,6 +23,7 @@
 
 from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array
+from ..utils._mask import _get_mask
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.fixes import parse_version, sp_version
 from ..utils.stats import _weighted_percentile
@@ -664,6 +665,20 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
         Order of output array in the dense case. `'F'` order is faster to compute, but
         may slow down subsequent estimators.
 
+    handle_missing : {'error', 'zeros'}, default='error'
+        Specifies the way missing values are handled.
+
+        - 'error' : Raise an error if `np.nan` values are present during :meth:`fit`.
+        - 'zeros' : Encode splines of missing values with values `0`.
+
+        Note that `handle_missing='zeros'` differs from first imputing missing values
+        with zeros and then creating the spline basis. The latter creates spline basis
+        functions which have non-zero values at the missing values
+        whereas this option simply sets all spline basis function values to zero at the
+        missing values.
+
+        .. versionadded:: 1.8
+
     sparse_output : bool, default=False
         Will return sparse CSR matrix if set True else will return an array.
 
@@ -730,6 +745,7 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
         ],
         "include_bias": ["boolean"],
         "order": [StrOptions({"C", "F"})],
+        "handle_missing": [StrOptions({"error", "zeros"})],
         "sparse_output": ["boolean"],
     }
 
@@ -742,6 +758,7 @@ def __init__(
         extrapolation="constant",
         include_bias=True,
         order="C",
+        handle_missing="error",
         sparse_output=False,
     ):
         self.n_knots = n_knots
@@ -750,11 +767,12 @@ def __init__(
         self.extrapolation = extrapolation
         self.include_bias = include_bias
         self.order = order
+        self.handle_missing = handle_missing
         self.sparse_output = sparse_output
 
     @staticmethod
     def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None):
-        """Calculate base knot positions.
+        """Calculate base knot positions for `knots` either "uniform" or "quantile".
 
         Base knots such that first knot <= feature <= last knot. For the
         B-spline construction with scipy.interpolate.BSpline, 2*degree knots
@@ -771,7 +789,7 @@ def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None)
             )
 
             if sample_weight is None:
-                knots = np.percentile(X, percentile_ranks, axis=0)
+                knots = np.nanpercentile(X, percentile_ranks, axis=0)
             else:
                 knots = np.array(
                     [
@@ -786,8 +804,15 @@ def _get_base_knot_positions(X, n_knots=10, knots="uniform", sample_weight=None)
             # `else` is therefore safe.
             # Disregard observations with zero weight.
             mask = slice(None, None, 1) if sample_weight is None else sample_weight > 0
-            x_min = np.amin(X[mask], axis=0)
-            x_max = np.amax(X[mask], axis=0)
+            x_min = np.zeros(X.shape[1], dtype=np.float64)
+            x_max = np.zeros(X.shape[1], dtype=np.float64)
+            for feature_idx in range(X.shape[1]):
+                x = X[mask, feature_idx]
+                if np.all(np.isnan(x)):
+                    continue
+                else:
+                    x_min[feature_idx] = np.nanmin(x)
+                    x_max[feature_idx] = np.nanmax(x)
 
             knots = np.linspace(
                 start=x_min,
@@ -851,14 +876,26 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted transformer.
         """
-        X = validate_data(
-            self,
-            X,
-            reset=True,
-            accept_sparse=False,
-            ensure_min_samples=2,
-            ensure_2d=True,
-        )
+        try:
+            X = validate_data(
+                self,
+                X,
+                reset=True,
+                accept_sparse=False,
+                ensure_min_samples=2,
+                ensure_2d=True,
+                ensure_all_finite=(self.handle_missing != "zeros"),
+            )
+        except ValueError as e:
+            if "Input X contains NaN." in str(e) and self.handle_missing == "error":
+                raise ValueError(
+                    "Input X contains NaN values and `SplineTransformer` is configured "
+                    "to error in this case (handle_missing='error'). To avoid this "
+                    "error, set handle_missing='zeros' to encode missing values as "
+                    "splines with value 0 or ensure no missing values in X."
+                ) from e
+            raise e
+
         if sample_weight is not None:
             sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
 
@@ -866,7 +903,10 @@ def fit(self, X, y=None, sample_weight=None):
 
         if isinstance(self.knots, str):
             base_knots = self._get_base_knot_positions(
-                X, n_knots=self.n_knots, knots=self.knots, sample_weight=sample_weight
+                X,
+                n_knots=self.n_knots,
+                knots=self.knots,
+                sample_weight=sample_weight,
             )
         else:
             base_knots = check_array(self.knots, dtype=np.float64)
@@ -974,14 +1014,21 @@ def transform(self, X):
         """
         check_is_fitted(self)
 
-        X = validate_data(self, X, reset=False, accept_sparse=False, ensure_2d=True)
+        X = validate_data(
+            self,
+            X,
+            reset=False,
+            accept_sparse=False,
+            ensure_2d=True,
+            ensure_all_finite=(self.handle_missing != "zeros"),
+        )
 
         n_samples, n_features = X.shape
         n_splines = self.bsplines_[0].c.shape[1]
         degree = self.degree
 
         # TODO: Remove this condition, once scipy 1.10 is the minimum version.
-        #       Only scipy => 1.10 supports design_matrix(.., extrapolate=..).
+        #       Only scipy >= 1.10 supports design_matrix(.., extrapolate=..).
         #       The default (implicit in scipy < 1.10) is extrapolate=False.
         scipy_1_10 = sp_version >= parse_version("1.10.0")
         # Note: self.bsplines_[0].extrapolate is True for extrapolation in
@@ -1005,8 +1052,10 @@ def transform(self, X):
         else:
             XBS = np.zeros((n_samples, n_out), dtype=dtype, order=self.order)
 
-        for i in range(n_features):
-            spl = self.bsplines_[i]
+        for feature_idx in range(n_features):
+            spl = self.bsplines_[feature_idx]
+            # Get indicator for nan values in the current column.
+            nan_row_indices = np.flatnonzero(_get_mask(X[:, feature_idx], np.nan))
 
             if self.extrapolation in ("continue", "error", "periodic"):
                 if self.extrapolation == "periodic":
@@ -1015,17 +1064,44 @@ def transform(self, X):
                     # This is equivalent to BSpline(.., extrapolate="periodic")
                     # for scipy>=1.0.0.
                     n = spl.t.size - spl.k - 1
-                    # Assign to new array to avoid inplace operation
-                    x = spl.t[spl.k] + (X[:, i] - spl.t[spl.k]) % (
-                        spl.t[n] - spl.t[spl.k]
-                    )
-                else:
-                    x = X[:, i]
+                    if spl.t[n] - spl.t[spl.k] > 0:
+                        # Assign to new array to avoid inplace operation
+                        x = spl.t[spl.k] + (X[:, feature_idx] - spl.t[spl.k]) % (
+                            spl.t[n] - spl.t[spl.k]
+                        )
+                    else:
+                        # This can happen if the column has a single non-nan
+                        # value. Treat as a constant feature.
+                        x = np.zeros_like(X[:, feature_idx])
+                else:  # self.extrapolation in ("continue", "error")
+                    x = X[:, feature_idx]
 
                 if use_sparse:
+                    # We replace the nan values in the input column by some
+                    # arbitrary, in-range, numerical value since
+                    # BSpline.design_matrix() would otherwise raise on any nan
+                    # value in its input. The spline encoded values in
+                    # the output of that function that correspond to missing
+                    # values in the original input will be replaced by 0.0
+                    # afterwards.
+                    #
+                    # Note that in the following we use np.nanmin(x) as the
+                    # input replacement to make sure that this code works even
+                    # when `extrapolation == "error"`. Any other choice of
+                    # in-range value would have worked work since the
+                    # corresponding values in the array are replaced by zeros.
+                    if nan_row_indices.size == x.size:
+                        # The column is all np.nan valued. Replace it by a
+                        # constant column with an arbitrary non-nan value
+                        # inside so that it is encoded as constant column.
+                        x = np.zeros_like(x)  # avoid mutation of input data
+                    elif nan_row_indices.shape[0] > 0:
+                        x = x.copy()  # avoid mutation of input data
+                        x[nan_row_indices] = np.nanmin(x)
                     XBS_sparse = BSpline.design_matrix(
                         x, spl.t, spl.k, **kwargs_extrapolate
                     )
+
                     if self.extrapolation == "periodic":
                         # See the construction of coef in fit. We need to add the last
                         # degree spline basis function to the first degree ones and
@@ -1034,72 +1110,113 @@ def transform(self, X):
                         XBS_sparse = XBS_sparse.tolil()
                         XBS_sparse[:, :degree] += XBS_sparse[:, -degree:]
                         XBS_sparse = XBS_sparse[:, :-degree]
+
+                    if nan_row_indices.shape[0] > 0:
+                        # Note: See comment about SparseEfficiencyWarning below.
+                        XBS = XBS_sparse.tolil()
+
                 else:
-                    XBS[:, (i * n_splines) : ((i + 1) * n_splines)] = spl(x)
+                    XBS[
+                        :, (feature_idx * n_splines) : ((feature_idx + 1) * n_splines)
+                    ] = spl(x)
+
+                # Replace any indicated values with 0:
+                if nan_row_indices.shape[0] > 0:
+                    for spline_idx in range(n_splines):
+                        output_feature_idx = n_splines * feature_idx + spline_idx
+                        XBS[
+                            nan_row_indices, output_feature_idx : output_feature_idx + 1
+                        ] = 0
+                    if use_sparse:
+                        XBS_sparse = XBS
+
             else:  # extrapolation in ("constant", "linear")
                 xmin, xmax = spl.t[degree], spl.t[-degree - 1]
                 # spline values at boundaries
                 f_min, f_max = spl(xmin), spl(xmax)
-                mask = (xmin <= X[:, i]) & (X[:, i] <= xmax)
+                # Values outside of the feature range during fit and nan values get
+                # filtered out:
+                inside_range_mask = (xmin <= X[:, feature_idx]) & (
+                    X[:, feature_idx] <= xmax
+                )
+
                 if use_sparse:
-                    mask_inv = ~mask
-                    x = X[:, i].copy()
-                    # Set some arbitrary values outside boundary that will be reassigned
-                    # later.
-                    x[mask_inv] = spl.t[self.degree]
+                    outside_range_mask = ~inside_range_mask
+                    x = X[:, feature_idx].copy()
+                    # Set to some arbitrary value within the range of values
+                    # observed on the training set before calling
+                    # BSpline.design_matrix. Those transformed will be
+                    # reassigned later when handling with extrapolation.
+                    x[outside_range_mask] = xmin
                     XBS_sparse = BSpline.design_matrix(x, spl.t, spl.k)
                     # Note: Without converting to lil_matrix we would get:
                     # scipy.sparse._base.SparseEfficiencyWarning: Changing the sparsity
                     # structure of a csr_matrix is expensive. lil_matrix is more
                     # efficient.
-                    if np.any(mask_inv):
+                    if np.any(outside_range_mask):
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[mask_inv, :] = 0
+                        XBS_sparse[outside_range_mask, :] = 0
+
                 else:
-                    XBS[mask, (i * n_splines) : ((i + 1) * n_splines)] = spl(X[mask, i])
+                    XBS[
+                        inside_range_mask,
+                        (feature_idx * n_splines) : ((feature_idx + 1) * n_splines),
+                    ] = spl(X[inside_range_mask, feature_idx])
 
             # Note for extrapolation:
             # 'continue' is already returned as is by scipy BSplines
             if self.extrapolation == "error":
-                # BSpline with extrapolate=False does not raise an error, but
-                # outputs np.nan.
-                if (use_sparse and np.any(np.isnan(XBS_sparse.data))) or (
-                    not use_sparse
-                    and np.any(
-                        np.isnan(XBS[:, (i * n_splines) : ((i + 1) * n_splines)])
+                has_nan_output_values = False
+                if use_sparse:
+                    # Early convert to CSR as the sparsity structure of this
+                    # block should not change anymore. This is needed to be able
+                    # to safely assume that `.data` is a 1D array.
+                    XBS_sparse = XBS_sparse.tocsr()
+                    has_nan_output_values = np.any(np.isnan(XBS_sparse.data))
+                else:
+                    output_features = slice(
+                        feature_idx * n_splines, (feature_idx + 1) * n_splines
                     )
-                ):
+                    has_nan_output_values = np.any(np.isnan(XBS[:, output_features]))
+
+                if has_nan_output_values:
                     raise ValueError(
-                        "X contains values beyond the limits of the knots."
+                        "`X` contains values beyond the limits of the knots."
                     )
+
             elif self.extrapolation == "constant":
                 # Set all values beyond xmin and xmax to the value of the
                 # spline basis functions at those two positions.
                 # Only the first degree and last degree number of splines
                 # have non-zero values at the boundaries.
 
-                mask = X[:, i] < xmin
-                if np.any(mask):
+                below_xmin_mask = X[:, feature_idx] < xmin
+                if np.any(below_xmin_mask):
                     if use_sparse:
                         # Note: See comment about SparseEfficiencyWarning above.
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[mask, :degree] = f_min[:degree]
+                        XBS_sparse[below_xmin_mask, :degree] = f_min[:degree]
 
                     else:
-                        XBS[mask, (i * n_splines) : (i * n_splines + degree)] = f_min[
-                            :degree
-                        ]
-
-                mask = X[:, i] > xmax
-                if np.any(mask):
+                        XBS[
+                            below_xmin_mask,
+                            (feature_idx * n_splines) : (
+                                feature_idx * n_splines + degree
+                            ),
+                        ] = f_min[:degree]
+
+                above_xmax_mask = X[:, feature_idx] > xmax
+                if np.any(above_xmax_mask):
                     if use_sparse:
                         # Note: See comment about SparseEfficiencyWarning above.
                         XBS_sparse = XBS_sparse.tolil()
-                        XBS_sparse[mask, -degree:] = f_max[-degree:]
+                        XBS_sparse[above_xmax_mask, -degree:] = f_max[-degree:]
                     else:
                         XBS[
-                            mask,
-                            ((i + 1) * n_splines - degree) : ((i + 1) * n_splines),
+                            above_xmax_mask,
+                            ((feature_idx + 1) * n_splines - degree) : (
+                                (feature_idx + 1) * n_splines
+                            ),
                         ] = f_max[-degree:]
 
             elif self.extrapolation == "linear":
@@ -1117,26 +1234,38 @@ def transform(self, X):
                     # boundary. For degree=0 it is the same as 'constant'.
                     degree += 1
                 for j in range(degree):
-                    mask = X[:, i] < xmin
-                    if np.any(mask):
-                        linear_extr = f_min[j] + (X[mask, i] - xmin) * fp_min[j]
+                    below_xmin_mask = X[:, feature_idx] < xmin
+                    if np.any(below_xmin_mask):
+                        linear_extr = (
+                            f_min[j]
+                            + (X[below_xmin_mask, feature_idx] - xmin) * fp_min[j]
+                        )
                         if use_sparse:
                             # Note: See comment about SparseEfficiencyWarning above.
                             XBS_sparse = XBS_sparse.tolil()
-                            XBS_sparse[mask, j] = linear_extr
+                            XBS_sparse[below_xmin_mask, j] = linear_extr
                         else:
-                            XBS[mask, i * n_splines + j] = linear_extr
+                            XBS[below_xmin_mask, feature_idx * n_splines + j] = (
+                                linear_extr
+                            )
 
-                    mask = X[:, i] > xmax
-                    if np.any(mask):
+                    above_xmax_mask = X[:, feature_idx] > xmax
+                    if np.any(above_xmax_mask):
                         k = n_splines - 1 - j
-                        linear_extr = f_max[k] + (X[mask, i] - xmax) * fp_max[k]
+                        linear_extr = (
+                            f_max[k]
+                            + (X[above_xmax_mask, feature_idx] - xmax) * fp_max[k]
+                        )
                         if use_sparse:
                             # Note: See comment about SparseEfficiencyWarning above.
                             XBS_sparse = XBS_sparse.tolil()
-                            XBS_sparse[mask, k : k + 1] = linear_extr[:, None]
+                            XBS_sparse[above_xmax_mask, k : k + 1] = linear_extr[
+                                :, None
+                            ]
                         else:
-                            XBS[mask, i * n_splines + k] = linear_extr
+                            XBS[above_xmax_mask, feature_idx * n_splines + k] = (
+                                linear_extr
+                            )
 
             if use_sparse:
                 XBS_sparse = XBS_sparse.tocsr()
@@ -1167,7 +1296,8 @@ def transform(self, X):
                 )
             XBS = sparse.hstack(output_list, format="csr")
         elif self.sparse_output:
-            # TODO: Remove ones scipy 1.10 is the minimum version. See comments above.
+            # TODO: Remove conversion to csr, once scipy 1.10 is the minimum version:
+            # Adjust format of XBS to sparse, for scipy versions < 1.10.0:
             XBS = sparse.csr_matrix(XBS)
 
         if self.include_bias:
@@ -1177,3 +1307,8 @@ def transform(self, X):
             # We chose the last one.
             indices = [j for j in range(XBS.shape[1]) if (j + 1) % n_splines != 0]
             return XBS[:, indices]
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = self.handle_missing == "zeros"
+        return tags

From 5618b80d4ac5670156074e9094e734891a4a1a9c Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Thu, 26 Jun 2025 14:25:45 +0500
Subject: [PATCH 17/24] Add desc in test

---
 sklearn/preprocessing/tests/test_polynomial.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 35efcda69f694..dd521bc08b5ca 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1414,6 +1414,8 @@ def test_polynomial_features_array_api_compliance(
 def test_polynomial_features_array_api_raises_on_order_F(
     array_namespace, device_, dtype_name
 ):
+    """Test that PolynomialFeatures with order='F' raises AttributeError on
+    array API namespaces other than numpy."""
     xp = _array_api_for_tests(array_namespace, device_)
     X = np.arange(6).reshape((3, 2)).astype(dtype_name)
     X_xp = xp.asarray(X, device=device_)

From 95894a1e44ce13c19fc78a39eb21ae2b3305fc9b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 27 Jun 2025 11:34:19 +0500
Subject: [PATCH 18/24] Address PR suggestions

---
 sklearn/preprocessing/_polynomial.py          |  3 +-
 .../preprocessing/tests/test_polynomial.py    | 29 +++----------------
 2 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index fcfa502ae9ba5..d30243d206985 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -508,7 +508,7 @@ def transform(self, X):
                 order_kwargs["order"] = self.order
             elif self.order == "F":
                 raise AttributeError(
-                    "PolynomialFeatures does not support order='F' for the array API"
+                    "PolynomialFeatures does not support order='F' for non-numpy arrays"
                 )
 
             XP = xp.empty(
@@ -562,6 +562,7 @@ def transform(self, X):
                     # XP[:, start:end] are terms of degree d - 1
                     # that exclude feature #feature_idx.
                     if _is_numpy_namespace(xp):
+                        # numpy performs this multiplication in place
                         np.multiply(
                             XP[:, start:end],
                             X[:, feature_idx : feature_idx + 1],
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index dd521bc08b5ca..c5ebbde7788f8 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1353,30 +1353,9 @@ def test_csr_polynomial_expansion_windows_fail(csr_container):
     yield_namespace_device_dtype_combinations(),
     ids=_get_namespace_device_dtype_ids,
 )
-@pytest.mark.parametrize(
-    "degree, include_bias, interaction_only",
-    [
-        (2, True, False),
-        (2, False, False),
-        (2, True, True),
-        (2, False, True),
-        ((2, 2), True, False),
-        ((2, 2), False, False),
-        ((2, 2), True, True),
-        ((2, 2), False, True),
-        (3, True, False),
-        (3, False, False),
-        (3, True, True),
-        (3, False, True),
-        ((2, 3), True, False),
-        ((2, 3), False, False),
-        ((2, 3), True, True),
-        ((2, 3), False, True),
-        ((3, 3), True, False),
-        ((3, 3), False, False),
-        ((3, 3), True, True),
-    ],
-)
+@pytest.mark.parametrize("interaction_only", [True, False])
+@pytest.mark.parametrize("include_bias", [True, False])
+@pytest.mark.parametrize("degree", [2, (2, 2), 3, (3, 3)])
 def test_polynomial_features_array_api_compliance(
     two_features_degree3,
     degree,
@@ -1419,7 +1398,7 @@ def test_polynomial_features_array_api_raises_on_order_F(
     xp = _array_api_for_tests(array_namespace, device_)
     X = np.arange(6).reshape((3, 2)).astype(dtype_name)
     X_xp = xp.asarray(X, device=device_)
-    msg = "PolynomialFeatures does not support order='F' for the array API"
+    msg = "PolynomialFeatures does not support order='F' for non-numpy arrays"
     with config_context(array_api_dispatch=True):
         pf = PolynomialFeatures(order="F").fit(X_xp)
         if _is_numpy_namespace(xp):  # Numpy should not raise

From 789aa09a9804bebc8b0746519816e73e4824a116 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 27 Jun 2025 11:50:05 +0500
Subject: [PATCH 19/24] Assert dtype

---
 sklearn/preprocessing/tests/test_polynomial.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index c5ebbde7788f8..6855eacaf6492 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1369,7 +1369,7 @@ def test_polynomial_features_array_api_compliance(
     xp = _array_api_for_tests(array_namespace, device_)
     X, _ = two_features_degree3
     X_np = X.astype(dtype_name)
-    X_xp = xp.asarray(X, device=device_)
+    X_xp = xp.asarray(X_np, device=device_)
     with config_context(array_api_dispatch=True):
         tf_np = PolynomialFeatures(
             degree=degree, include_bias=include_bias, interaction_only=interaction_only
@@ -1383,6 +1383,7 @@ def test_polynomial_features_array_api_compliance(
         assert_allclose(_convert_to_numpy(out_xp, xp=xp), out_np)
         assert get_namespace(out_xp)[0].__name__ == xp.__name__
         assert device(out_xp) == device(X_xp)
+        assert out_xp.dtype == X_xp.dtype
 
 
 @pytest.mark.parametrize(

From 8ec11ef4d18bfed44b73cc32b5983a50d7b342ab Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 1 Jul 2025 10:49:25 +0500
Subject: [PATCH 20/24] Add a test for supported_float_types

---
 sklearn/utils/tests/test_array_api.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index ba0b63c6efd01..33b7928169478 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -33,6 +33,7 @@
     get_namespace_and_device,
     indexing_dtype,
     np_compat,
+    supported_float_dtypes,
     yield_namespace_device_dtype_combinations,
 )
 from sklearn.utils._testing import (
@@ -777,3 +778,24 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
         res_xp_2 = _logsumexp(array_xp_2, axis=axis)
         res_xp_2 = _convert_to_numpy(res_xp_2, xp)
         assert_allclose(res_np_2, res_xp_2, rtol=rtol)
+
+
+@pytest.mark.parametrize(
+    ("namespace", "device_", "expected_types"),
+    [
+        ("numpy", None, ("float64", "float32", "<class 'numpy.float16'>")),
+        (
+            "array_api_strict",
+            None,
+            ("array_api_strict.float64", "array_api_strict.float32"),
+        ),
+        ("torch", "cpu", ("torch.float64", "torch.float32", "torch.float16")),
+        ("torch", "cuda", ("torch.float64", "torch.float32", "torch.float16")),
+        ("torch", "mps", ("torch.float32", "torch.float16")),
+    ],
+)
+def test_supported_float_types(namespace, device_, expected_types):
+    xp = _array_api_for_tests(namespace, device_)
+    float_types = supported_float_dtypes(xp, device=device_)
+    float_types = tuple(str(float_type) for float_type in float_types)
+    assert float_types == expected_types

From ef7e9ee8132aa78c60a13c77e0e811c03983b00b Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Tue, 1 Jul 2025 13:13:27 +0500
Subject: [PATCH 21/24] Improve the checking of dtypes rather than str in the
 test for supported float types

---
 sklearn/utils/tests/test_array_api.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 33b7928169478..c430b7d13a792 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -783,19 +783,15 @@ def test_logsumexp_like_scipy_logsumexp(array_namespace, device_, dtype_name, ax
 @pytest.mark.parametrize(
     ("namespace", "device_", "expected_types"),
     [
-        ("numpy", None, ("float64", "float32", "<class 'numpy.float16'>")),
-        (
-            "array_api_strict",
-            None,
-            ("array_api_strict.float64", "array_api_strict.float32"),
-        ),
-        ("torch", "cpu", ("torch.float64", "torch.float32", "torch.float16")),
-        ("torch", "cuda", ("torch.float64", "torch.float32", "torch.float16")),
-        ("torch", "mps", ("torch.float32", "torch.float16")),
+        ("numpy", None, ("float64", "float32", "float16")),
+        ("array_api_strict", None, ("float64", "float32")),
+        ("torch", "cpu", ("float64", "float32", "float16")),
+        ("torch", "cuda", ("float64", "float32", "float16")),
+        ("torch", "mps", ("float32", "float16")),
     ],
 )
 def test_supported_float_types(namespace, device_, expected_types):
     xp = _array_api_for_tests(namespace, device_)
     float_types = supported_float_dtypes(xp, device=device_)
-    float_types = tuple(str(float_type) for float_type in float_types)
-    assert float_types == expected_types
+    expected = tuple(getattr(xp, dtype_name) for dtype_name in expected_types)
+    assert float_types == expected

From 4804b43f23d390307ecff156a1d36a6aa0e7c9ab Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman2007@gmail.com>
Date: Tue, 1 Jul 2025 13:59:36 +0500
Subject: [PATCH 22/24] Update sklearn/preprocessing/_polynomial.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/preprocessing/_polynomial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index d30243d206985..701a578bffcdd 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -507,7 +507,7 @@ def transform(self, X):
             if _is_numpy_namespace(xp=xp):
                 order_kwargs["order"] = self.order
             elif self.order == "F":
-                raise AttributeError(
+                raise ValueError(
                     "PolynomialFeatures does not support order='F' for non-numpy arrays"
                 )
 

From ebbf4f85bc3e5f198c4cd662cd0097acb3717431 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman2007@gmail.com>
Date: Tue, 1 Jul 2025 13:59:45 +0500
Subject: [PATCH 23/24] Update sklearn/preprocessing/tests/test_polynomial.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/preprocessing/tests/test_polynomial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 6855eacaf6492..99cf9ad0bb7b1 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1394,7 +1394,7 @@ def test_polynomial_features_array_api_compliance(
 def test_polynomial_features_array_api_raises_on_order_F(
     array_namespace, device_, dtype_name
 ):
-    """Test that PolynomialFeatures with order='F' raises AttributeError on
+    """Test that PolynomialFeatures with order='F' raises ValueError on
     array API namespaces other than numpy."""
     xp = _array_api_for_tests(array_namespace, device_)
     X = np.arange(6).reshape((3, 2)).astype(dtype_name)

From 8e488f016db9e25e1f2496c154b2bd0f1995c8cb Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman2007@gmail.com>
Date: Tue, 1 Jul 2025 13:59:52 +0500
Subject: [PATCH 24/24] Update sklearn/preprocessing/tests/test_polynomial.py

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/preprocessing/tests/test_polynomial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index 99cf9ad0bb7b1..fee34b0aefccd 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1405,5 +1405,5 @@ def test_polynomial_features_array_api_raises_on_order_F(
         if _is_numpy_namespace(xp):  # Numpy should not raise
             pf.transform(X_xp)
         else:
-            with pytest.raises(AttributeError, match=msg):
+            with pytest.raises(ValueError, match=msg):
                 pf.transform(X_xp)