scikit-learn
diff --git a/‎doc/modules/array_api.rst
Lines changed: 3 additions & 1 deletion b/‎doc/modules/array_api.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/whats_new/upcoming_changes/array-api/27113.feature.rst
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new/upcoming_changes/array-api/27113.feature.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/preprocessing/_data.py
Lines changed: 27 additions & 18 deletions b/‎sklearn/preprocessing/_data.py
Lines changed: 27 additions & 18 deletions
diff --git a/‎sklearn/preprocessing/tests/test_data.py
Lines changed: 100 additions & 7 deletions b/‎sklearn/preprocessing/tests/test_data.py
Lines changed: 100 additions & 7 deletions
diff --git a/‎sklearn/utils/_array_api.py
Lines changed: 38 additions & 1 deletion b/‎sklearn/utils/_array_api.py
Lines changed: 38 additions & 1 deletion
@@ -123,6 +123,7 @@ Estimators
 - :class:`preprocessing.MinMaxScaler`
 - :class:`preprocessing.Normalizer`
 - :class:`preprocessing.PolynomialFeatures`
+- :class:`preprocessing.StandardScaler` (see :ref:`device_support_for_float64`)
 - :class:`mixture.GaussianMixture` (with `init_params="random"` or
   `init_params="random_from_data"` and `warm_start=False`)
 
@@ -329,7 +330,8 @@ Note on device support for ``float64``
 
 Certain operations within scikit-learn will automatically perform operations
 on floating-point values with `float64` precision to prevent overflows and ensure
-correctness (e.g., :func:`metrics.pairwise.euclidean_distances`). However,
+correctness (e.g., :func:`metrics.pairwise.euclidean_distances`,
+:class:`preprocessing.StandardScaler`). However,
 certain combinations of array namespaces and devices, such as `PyTorch on MPS`
 (see :ref:`mps_support`) do not support the `float64` data type. In these cases,
 scikit-learn will revert to using the `float32` data type instead. This can result in
 
@@ -0,0 +1,3 @@
+- :class:`sklearn.preprocessing.StandardScaler` now supports Array API compliant inputs.
+  :pr:`27113` by :user:`Alexander Fabisch <AlexanderFabisch>`, :user:`Edoardo Abati <EdAbati>`,
+  :user:`Olivier Grisel <ogrisel>` and :user:`Charles Hill <charlesjhill>`.
@@ -20,10 +20,13 @@
 from sklearn.utils import _array_api, check_array, metadata_routing, resample
 from sklearn.utils._array_api import (
     _find_matching_floating_dtype,
+    _max_precision_float_dtype,
     _modify_in_place_if_numpy,
     device,
     get_namespace,
     get_namespace_and_device,
+    size,
+    supported_float_dtypes,
 )
 from sklearn.utils._param_validation import (
     Interval,
@@ -86,7 +89,9 @@ def _is_constant_feature(var, mean, n_samples):
     recommendations", by Chan, Golub, and LeVeque.
     """
     # In scikit-learn, variance is always computed using float64 accumulators.
-    eps = np.finfo(np.float64).eps
+    xp, _, device_ = get_namespace_and_device(var, mean)
+    max_float_dtype = _max_precision_float_dtype(xp=xp, device=device_)
+    eps = xp.finfo(max_float_dtype).eps
 
     upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2
     return var <= upper_bound
@@ -952,12 +957,13 @@ def partial_fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted scaler.
         """
+        xp, _, X_device = get_namespace_and_device(X)
         first_call = not hasattr(self, "n_samples_seen_")
         X = validate_data(
             self,
             X,
             accept_sparse=("csr", "csc"),
-            dtype=FLOAT_DTYPES,
+            dtype=supported_float_dtypes(xp, X_device),
             ensure_all_finite="allow-nan",
             reset=first_call,
         )
@@ -971,14 +977,14 @@ def partial_fit(self, X, y=None, sample_weight=None):
         # See incr_mean_variance_axis and _incremental_mean_variance_axis
 
         # if n_samples_seen_ is an integer (i.e. no missing values), we need to
-        # transform it to a NumPy array of shape (n_features,) required by
+        # transform it to an array of shape (n_features,) required by
         # incr_mean_variance_axis and _incremental_variance_axis
-        dtype = np.int64 if sample_weight is None else X.dtype
-        if not hasattr(self, "n_samples_seen_"):
-            self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)
-        elif np.size(self.n_samples_seen_) == 1:
-            self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
-            self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
+        dtype = xp.int64 if sample_weight is None else X.dtype
+        if first_call:
+            self.n_samples_seen_ = xp.zeros(n_features, dtype=dtype, device=X_device)
+        elif size(self.n_samples_seen_) == 1:
+            self.n_samples_seen_ = xp.repeat(self.n_samples_seen_, X.shape[1])
+            self.n_samples_seen_ = xp.astype(self.n_samples_seen_, dtype, copy=False)
 
         if sparse.issparse(X):
             if self.with_mean:
@@ -1036,7 +1042,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
             if not self.with_mean and not self.with_std:
                 self.mean_ = None
                 self.var_ = None
-                self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)
+                self.n_samples_seen_ += X.shape[0] - xp.isnan(X).sum(axis=0)
 
             else:
                 self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(
@@ -1050,7 +1056,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
         # for backward-compatibility, reduce n_samples_seen_ to an integer
         # if the number of samples is the same for each feature (i.e. no
         # missing values)
-        if np.ptp(self.n_samples_seen_) == 0:
+        if xp.max(self.n_samples_seen_) == xp.min(self.n_samples_seen_):
             self.n_samples_seen_ = self.n_samples_seen_[0]
 
         if self.with_std:
@@ -1060,7 +1066,7 @@ def partial_fit(self, X, y=None, sample_weight=None):
                 self.var_, self.mean_, self.n_samples_seen_
             )
             self.scale_ = _handle_zeros_in_scale(
-                np.sqrt(self.var_), copy=False, constant_mask=constant_mask
+                xp.sqrt(self.var_), copy=False, constant_mask=constant_mask
             )
         else:
             self.scale_ = None
@@ -1082,6 +1088,7 @@ def transform(self, X, copy=None):
         X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
             Transformed array.
         """
+        xp, _, X_device = get_namespace_and_device(X)
         check_is_fitted(self)
 
         copy = copy if copy is not None else self.copy
@@ -1091,7 +1098,7 @@ def transform(self, X, copy=None):
             reset=False,
             accept_sparse="csr",
             copy=copy,
-            dtype=FLOAT_DTYPES,
+            dtype=supported_float_dtypes(xp, X_device),
             force_writeable=True,
             ensure_all_finite="allow-nan",
         )
@@ -1106,9 +1113,9 @@ def transform(self, X, copy=None):
                 inplace_column_scale(X, 1 / self.scale_)
         else:
             if self.with_mean:
-                X -= self.mean_
+                X -= xp.astype(self.mean_, X.dtype)
             if self.with_std:
-                X /= self.scale_
+                X /= xp.astype(self.scale_, X.dtype)
         return X
 
     def inverse_transform(self, X, copy=None):
@@ -1127,14 +1134,15 @@ def inverse_transform(self, X, copy=None):
         X_original : {ndarray, sparse matrix} of shape (n_samples, n_features)
             Transformed array.
         """
+        xp, _, X_device = get_namespace_and_device(X)
         check_is_fitted(self)
 
         copy = copy if copy is not None else self.copy
         X = check_array(
             X,
             accept_sparse="csr",
             copy=copy,
-            dtype=FLOAT_DTYPES,
+            dtype=supported_float_dtypes(xp, X_device),
             force_writeable=True,
             ensure_all_finite="allow-nan",
         )
@@ -1149,16 +1157,17 @@ def inverse_transform(self, X, copy=None):
                 inplace_column_scale(X, self.scale_)
         else:
             if self.with_std:
-                X *= self.scale_
+                X *= xp.astype(self.scale_, X.dtype)
             if self.with_mean:
-                X += self.mean_
+                X += xp.astype(self.mean_, X.dtype)
         return X
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
         tags.input_tags.sparse = not self.with_mean
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.array_api_support = True
         return tags
 
 
 
@@ -43,7 +43,6 @@
     _get_namespace_device_dtype_ids,
     yield_namespace_device_dtype_combinations,
 )
-from sklearn.utils._test_common.instance_generator import _get_check_estimator_ids
 from sklearn.utils._testing import (
     _array_api_for_tests,
     _convert_container,
@@ -56,6 +55,7 @@
     skip_if_32bit,
 )
 from sklearn.utils.estimator_checks import (
+    _get_check_estimator_ids,
     check_array_api_input_and_values,
 )
 from sklearn.utils.fixes import (
@@ -117,10 +117,13 @@ def test_raises_value_error_if_sample_weights_greater_than_1d():
             scaler.fit(X, y, sample_weight=sample_weight_notOK)
 
 
-@pytest.mark.parametrize(
-    ["Xw", "X", "sample_weight"],
-    [
-        ([[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [1, 2, 3], [4, 5, 6]], [2.0, 1.0]),
+def _yield_xw_x_sampleweight():
+    yield from (
+        (
+            [[1, 2, 3], [4, 5, 6]],
+            [[1, 2, 3], [1, 2, 3], [4, 5, 6]],
+            [2.0, 1.0],
+        ),
         (
             [[1, 0, 1], [0, 0, 1]],
             [[1, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]],
@@ -136,8 +139,10 @@ def test_raises_value_error_if_sample_weights_greater_than_1d():
             ],
             np.array([1, 3]),
         ),
-    ],
-)
+    )
+
+
+@pytest.mark.parametrize(["Xw", "X", "sample_weight"], _yield_xw_x_sampleweight())
 @pytest.mark.parametrize("array_constructor", ["array", "sparse_csr", "sparse_csc"])
 def test_standard_scaler_sample_weight(Xw, X, sample_weight, array_constructor):
     with_mean = not array_constructor.startswith("sparse")
@@ -161,6 +166,68 @@ def test_standard_scaler_sample_weight(Xw, X, sample_weight, array_constructor):
     assert_almost_equal(scaler.transform(X_test), scaler_w.transform(X_test))
 
 
+@pytest.mark.parametrize(["Xw", "X", "sample_weight"], _yield_xw_x_sampleweight())
+@pytest.mark.parametrize(
+    "namespace, dev, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+def test_standard_scaler_sample_weight_array_api(
+    Xw, X, sample_weight, namespace, dev, dtype
+):
+    # N.B. The sample statistics for Xw w/ sample_weight should match
+    #      the statistics of X w/ uniform sample_weight.
+    xp = _array_api_for_tests(namespace, dev)
+
+    X = np.array(X).astype(dtype, copy=False)
+    y = np.ones(X.shape[0]).astype(dtype, copy=False)
+    Xw = np.array(Xw).astype(dtype, copy=False)
+    yw = np.ones(Xw.shape[0]).astype(dtype, copy=False)
+    X_test = np.array([[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]).astype(dtype, copy=False)
+
+    scaler = StandardScaler()
+    scaler.fit(X, y)
+
+    scaler_w = StandardScaler()
+    scaler_w.fit(Xw, yw, sample_weight=sample_weight)
+
+    # Test array-api support and correctness.
+    X_xp = xp.asarray(X, device=dev)
+    y_xp = xp.asarray(y, device=dev)
+    Xw_xp = xp.asarray(Xw, device=dev)
+    yw_xp = xp.asarray(yw, device=dev)
+    X_test_xp = xp.asarray(X_test, device=dev)
+    sample_weight_xp = xp.asarray(sample_weight, device=dev)
+
+    scaler_w_xp = StandardScaler()
+    with config_context(array_api_dispatch=True):
+        scaler_w_xp.fit(Xw_xp, yw_xp, sample_weight=sample_weight_xp)
+        w_mean = _convert_to_numpy(scaler_w_xp.mean_, xp=xp)
+        w_var = _convert_to_numpy(scaler_w_xp.var_, xp=xp)
+
+    assert_allclose(scaler_w.mean_, w_mean)
+    assert_allclose(scaler_w.var_, w_var)
+
+    # unweighted, but with repeated samples
+    scaler_xp = StandardScaler()
+    with config_context(array_api_dispatch=True):
+        scaler_xp.fit(X_xp, y_xp)
+        uw_mean = _convert_to_numpy(scaler_xp.mean_, xp=xp)
+        uw_var = _convert_to_numpy(scaler_xp.var_, xp=xp)
+
+    assert_allclose(scaler.mean_, uw_mean)
+    assert_allclose(scaler.var_, uw_var)
+
+    # Check that both array-api outputs match.
+    assert_allclose(uw_mean, w_mean)
+    assert_allclose(uw_var, w_var)
+    with config_context(array_api_dispatch=True):
+        assert_allclose(
+            _convert_to_numpy(scaler_xp.transform(X_test_xp), xp=xp),
+            _convert_to_numpy(scaler_w_xp.transform(X_test_xp), xp=xp),
+        )
+
+
 def test_standard_scaler_1d():
     # Test scaling of dataset along single axis
     for X in [X_1row, X_1col, X_list_1row, X_list_1row]:
@@ -726,6 +793,32 @@ def test_preprocessing_array_api_compliance(
     check(name, estimator, array_namespace, device=device, dtype_name=dtype_name)
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+@pytest.mark.parametrize(
+    "check",
+    [check_array_api_input_and_values],
+    ids=_get_check_estimator_ids,
+)
+@pytest.mark.parametrize("sample_weight", [True, None])
+def test_standard_scaler_array_api_compliance(
+    check, sample_weight, array_namespace, device, dtype_name
+):
+    estimator = StandardScaler()
+    name = estimator.__class__.__name__
+    check(
+        name,
+        estimator,
+        array_namespace,
+        device=device,
+        dtype_name=dtype_name,
+        check_sample_weight=sample_weight,
+    )
+
+
 def test_min_max_scaler_iris():
     X = iris.data
     scaler = MinMaxScaler()
 
@@ -246,10 +246,34 @@ def _union1d(a, b, xp):
 def supported_float_dtypes(xp, device=None):
     """Supported floating point types for the namespace.
 
-    Note: float16 is not officially part of the Array API spec at the
+    Parameters
+    ----------
+    xp : module
+        Array namespace to inspect.
+
+    device : str or device instance from xp, default=None
+        Device to use for dtype selection. If ``None``, then a default device
+        is assumed.
+
+    Returns
+    -------
+    supported_dtypes : tuple
+        Tuple of real floating data types supported by the provided array namespace,
+        ordered from the highest precision to lowest.
+
+    See Also
+    --------
+    max_precision_float_dtype : Maximum float dtype for a namespace/device pair.
+
+    Notes
+    -----
+    `float16` is not officially part of the Array API spec at the
     time of writing but scikit-learn estimators and functions can choose
     to accept it when xp.float16 is defined.
 
+    Additionally, some devices available within a namespace may not support
+    all floating-point types that the namespace provides.
+
     https://data-apis.org/array-api/latest/API_specification/data_types.html
     """
     dtypes_dict = xp.__array_namespace_info__().dtypes(
@@ -748,6 +772,19 @@ def _nanmean(X, axis=None, xp=None):
         return total / count
 
 
+def _nansum(X, axis=None, xp=None, keepdims=False, dtype=None):
+    # TODO: refactor once nan-aware reductions are standardized:
+    # https://github.com/data-apis/array-api/issues/621
+    xp, _, X_device = get_namespace_and_device(X, xp=xp)
+
+    if _is_numpy_namespace(xp):
+        return xp.asarray(numpy.nansum(X, axis=axis, keepdims=keepdims, dtype=dtype))
+
+    mask = xp.isnan(X)
+    masked_arr = xp.where(mask, xp.asarray(0, device=X_device, dtype=X.dtype), X)
+    return xp.sum(masked_arr, axis=axis, keepdims=keepdims, dtype=dtype)
+
+
 def _asarray_with_order(
     array, dtype=None, order=None, copy=None, *, xp=None, device=None
 ):
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- :class:`sklearn.preprocessing.StandardScaler` now supports Array API compliant inputs.
	`2`	+ :pr:`27113` by :user:`Alexander Fabisch <AlexanderFabisch>`, :user:`Edoardo Abati <EdAbati>`,
	`3`	+ :user:`Olivier Grisel <ogrisel>` and :user:`Charles Hill <charlesjhill>`.