Skip to content

MAINT: convert numpy.array_api to array-api-strict #28555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by conda-lock.
# platform: linux-64
# input_hash: 0ef2318a417ecd9806c39a466da49a53e8dab8b199cceb5bffcdd59c0a293907
# input_hash: cdc27128862fe4a9c586b6961e094936bdb919a4b142282ae0370d7e4cb2c9a4
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef
Expand Down Expand Up @@ -200,6 +200,7 @@ https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.t
https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.10.57-h85b1a90_19.conda#0605d3d60857fc07bd6a11e878fe0f08
https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py311h64a7726_0.conda#a502d7aad449a1206efb366d6a12c52d
https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h5810be5_19.conda#54866f708d43002a514d0b9b0f84bc11
https://conda.anaconda.org/conda-forge/noarch/array-api-strict-1.1-pyhd8ed1ab_0.conda#db4260fac4412db30bd5213b6c9f6ecc
https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py311h9547e67_0.conda#40828c5b36ef52433e21f89943e09f33
https://conda.anaconda.org/conda-forge/linux-64/libarrow-12.0.1-hb87d912_8_cpu.conda#3f3b11398fe79b578e3c44dd00a44e4a
https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py311h320fe9a_0.conda#aac8d7137fedc2fd5f8320bf50e4204c
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ dependencies:
- polars
- pyarrow
- array-api-compat
- array-api-strict
1 change: 1 addition & 0 deletions build_tools/update_environments_and_lock_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def remove_from(alist, to_remove):
"polars",
"pyarrow",
"array-api-compat",
"array-api-strict",
],
"package_constraints": {
"blas": "[build=mkl]",
Expand Down
2 changes: 1 addition & 1 deletion doc/modules/array_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ At this stage, this support is **considered experimental** and must be enabled
explicitly as explained in the following.

.. note::
Currently, only `cupy.array_api`, `numpy.array_api`, `cupy`, and `PyTorch`
Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch`
are known to work with scikit-learn's estimators.

Example usage
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def test_pca_mle_array_api_compliance(

def test_array_api_error_and_warnings_on_unsupported_params():
pytest.importorskip("array_api_compat")
xp = pytest.importorskip("numpy.array_api")
xp = pytest.importorskip("array_api_strict")
iris_xp = xp.asarray(iris.data)

pca = PCA(n_components=2, svd_solver="arpack", random_state=0)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/linear_model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
_fit_context,
)
from ..utils import check_array, check_random_state
from ..utils._array_api import get_namespace
from ..utils._array_api import get_namespace, indexing_dtype
from ..utils._seq_dataset import (
ArrayDataset32,
ArrayDataset64,
Expand Down Expand Up @@ -350,7 +350,7 @@ def predict(self, X):
xp, _ = get_namespace(X)
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = xp.astype(scores > 0, int)
indices = xp.astype(scores > 0, indexing_dtype(xp))
else:
indices = xp.argmax(scores, axis=1)

Expand Down
67 changes: 46 additions & 21 deletions sklearn/utils/_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .._config import get_config
from .fixes import parse_version

_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy", "numpy.array_api"}
_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy"}
Copy link
Member

@betatim betatim Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I support this change, but not including array_api_strict here is a change from how we have been treating numpy.array_api. It might make some uses of is_numpy_namespace() obsolete. (A good thing IMHO)

Related: do we need to keep numpy.array_api here for people who have an older numpy version and keep using it and expect that support for something experimental continues to exist in scikit-learn? I think we shouldn't keep it in and just assume that there are ~0 people in the world who use numpy.array_api.

Copy link
Member

@ogrisel ogrisel Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might make some uses of is_numpy_namespace() obsolete.

Indeed for some cases (that we should simplify progressively) but it's still useful from time to time to detect cases where:

  • sklearn.get_config(array_api_dispatch=True) and the input is a (wrapped) numpy array.

vs

  • sklearn.get_config(array_api_dispatch=True) and the input is not a (wrapped) numpy array.

This is important to know when we can safely convert back and forth to numpy without overhead, for instance to use the out= kwarg for memory efficiency reasons.

do we need to keep numpy.array_api here for people who have an older numpy version and keep using it and expect that support for something experimental continues to exist in scikit-learn?

I don't see any value in this. numpy.array_api was a temporary experiment and it's going away. There is no point in using it in the future: it just adds complexity for no benefit.

numpy.array_api's value was mostly for testing array api compliance, and this is now better served by array-api-strict and it works even with older numpy versions as demonstrated by our CI.



def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
Expand Down Expand Up @@ -42,8 +42,8 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
# tests are regular numpy arrays without any "device" attribute.
"numpy",
# Stricter NumPy-based Array API implementation. The
# numpy.array_api.Array instances always a dummy "device" attribute.
"numpy.array_api",
# array_api_strict.Array instances always have a dummy "device" attribute.
"array_api_strict",
"cupy",
"cupy.array_api",
"torch",
Expand Down Expand Up @@ -194,7 +194,6 @@ def _isdtype_single(dtype, kind, *, xp):
return dtype in supported_float_dtypes(xp)
elif kind == "complex floating":
# Some name spaces do not have complex, such as cupy.array_api
# and numpy.array_api
complex_dtypes = set()
if hasattr(xp, "complex64"):
complex_dtypes.add(xp.complex64)
Expand Down Expand Up @@ -300,14 +299,20 @@ def wrapped_func(*args, **kwargs):
class _NumPyAPIWrapper:
"""Array API compat wrapper for any numpy version

NumPy < 1.22 does not expose the numpy.array_api namespace. This
wrapper makes it possible to write code that uses the standard
Array API while working with any version of NumPy supported by
scikit-learn.
NumPy < 2 does not implement the namespace. NumPy 2 and later should
progressively implement more an more of the latest Array API spec but this
is still work in progress at this time.

This wrapper makes it possible to write code that uses the standard Array
API while working with any version of NumPy supported by scikit-learn.

See the `get_namespace()` public function for more details.
"""

# TODO: once scikit-learn drops support for NumPy < 2, this class can be
# removed, assuming Array API compliance of NumPy 2 is actually sufficient
# for scikit-learn's needs.

# Creation functions in spec:
# https://data-apis.org/array-api/latest/API_specification/creation_functions.html
_CREATION_FUNCS = {
Expand Down Expand Up @@ -447,18 +452,15 @@ def _remove_non_arrays(*arrays, remove_none=True, remove_types=(str,)):
def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
"""Get namespace of arrays.

Introspect `arrays` arguments and return their common Array API
compatible namespace object, if any. NumPy 1.22 and later can
construct such containers using the `numpy.array_api` namespace
for instance.
Introspect `arrays` arguments and return their common Array API compatible
namespace object, if any.

See: https://numpy.org/neps/nep-0047-array-api-standard.html

If `arrays` are regular numpy arrays, an instance of the
`_NumPyAPIWrapper` compatibility wrapper is returned instead.
If `arrays` are regular numpy arrays, an instance of the `_NumPyAPIWrapper`
compatibility wrapper is returned instead.

Namespace support is not enabled by default. To enabled it
call:
Namespace support is not enabled by default. To enabled it call:

sklearn.set_config(array_api_dispatch=True)

Expand All @@ -467,10 +469,9 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
with sklearn.config_context(array_api_dispatch=True):
# your code here

Otherwise an instance of the `_NumPyAPIWrapper`
compatibility wrapper is always returned irrespective of
the fact that arrays implement the `__array_namespace__`
protocol or not.
Otherwise an instance of the `_NumPyAPIWrapper` compatibility wrapper is
always returned irrespective of the fact that arrays implement the
`__array_namespace__` protocol or not.

Parameters
----------
Expand Down Expand Up @@ -524,7 +525,7 @@ def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):

# These namespaces need additional wrapping to smooth out small differences
# between implementations
if namespace.__name__ in {"numpy.array_api", "cupy.array_api"}:
if namespace.__name__ in {"cupy.array_api"}:
namespace = _ArrayAPIWrapper(namespace)

return namespace, is_array_api_compliant
Expand Down Expand Up @@ -763,3 +764,27 @@ def _estimator_with_converted_arrays(estimator, converter):
def _atol_for_type(dtype):
"""Return the absolute tolerance for a given numpy dtype."""
return numpy.finfo(dtype).eps * 100


def indexing_dtype(xp):
"""Return a platform-specific integer dtype suitable for indexing.

On 32-bit platforms, this will typically return int32 and int64 otherwise.

Note: using dtype is recommended for indexing transient array
datastructures. For long-lived arrays, such as the fitted attributes of
estimators, it is instead recommended to use platform-independent int32 if
we do not expect to index more 2B elements. Using fixed dtypes simplifies
the handling of serialized models, e.g. to deploy a model fit on a 64-bit
platform to a target 32-bit platform such as WASM/pyodide.
"""
# Currently this is implemented with simple hack that assumes that
# following "may be" statements in the Array API spec always hold:
# > The default integer data type should be the same across platforms, but
# > the default may vary depending on whether Python is 32-bit or 64-bit.
# > The default array index data type may be int32 on 32-bit platforms, but
# > the default should be int64 otherwise.
# https://data-apis.org/array-api/latest/API_specification/data_types.html#default-data-types
# TODO: once sufficiently adopted, we might want to instead rely on the
# newer inspection API: https://github.com/data-apis/array-api/issues/640
return xp.asarray(0).dtype
8 changes: 1 addition & 7 deletions sklearn/utils/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,13 +1054,7 @@ def fit_transform(self, X, y=None):

def _array_api_for_tests(array_namespace, device):
try:
if array_namespace == "numpy.array_api":
# FIXME: once it is not experimental anymore
with ignore_warnings(category=UserWarning):
# UserWarning: numpy.array_api submodule is still experimental.
array_mod = importlib.import_module(array_namespace)
else:
array_mod = importlib.import_module(array_namespace)
array_mod = importlib.import_module(array_namespace)
except ModuleNotFoundError:
raise SkipTest(
f"{array_namespace} is not installed: not checking array_api input"
Expand Down
39 changes: 24 additions & 15 deletions sklearn/utils/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from sklearn._config import config_context
from sklearn.base import BaseEstimator
from sklearn.utils import _IS_32BIT
from sklearn.utils._array_api import (
_ArrayAPIWrapper,
_asarray_with_order,
Expand All @@ -19,6 +20,7 @@
_NumPyAPIWrapper,
device,
get_namespace,
indexing_dtype,
supported_float_dtypes,
yield_namespace_device_dtype_combinations,
)
Expand All @@ -27,10 +29,6 @@
skip_if_array_api_compat_not_configured,
)

pytestmark = pytest.mark.filterwarnings(
"ignore:The numpy.array_api submodule:UserWarning"
)


@pytest.mark.parametrize("X", [numpy.asarray([1, 2, 3]), [1, 2, 3]])
def test_get_namespace_ndarray_default(X):
Expand Down Expand Up @@ -68,14 +66,13 @@ def test_get_namespace_ndarray_with_dispatch():
@skip_if_array_api_compat_not_configured
def test_get_namespace_array_api():
"""Test get_namespace for ArrayAPI arrays."""
xp = pytest.importorskip("numpy.array_api")
xp = pytest.importorskip("array_api_strict")

X_np = numpy.asarray([[1, 2, 3]])
X_xp = xp.asarray(X_np)
with config_context(array_api_dispatch=True):
xp_out, is_array_api_compliant = get_namespace(X_xp)
assert is_array_api_compliant
assert isinstance(xp_out, _ArrayAPIWrapper)

with pytest.raises(TypeError):
xp_out, is_array_api_compliant = get_namespace(X_xp, X_np)
Expand All @@ -91,8 +88,8 @@ def __init__(self, array_namespace, name):

def test_array_api_wrapper_astype():
"""Test _ArrayAPIWrapper for ArrayAPIs that is not NumPy."""
numpy_array_api = pytest.importorskip("numpy.array_api")
xp_ = _AdjustableNameAPITestWrapper(numpy_array_api, "wrapped_numpy.array_api")
array_api_strict = pytest.importorskip("array_api_strict")
xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict")
xp = _ArrayAPIWrapper(xp_)

X = xp.asarray(([[1, 2, 3], [3, 4, 5]]), dtype=xp.float64)
Expand All @@ -103,7 +100,7 @@ def test_array_api_wrapper_astype():
assert X_converted.dtype == xp.float32


@pytest.mark.parametrize("array_api", ["numpy", "numpy.array_api"])
@pytest.mark.parametrize("array_api", ["numpy", "array_api_strict"])
def test_asarray_with_order(array_api):
"""Test _asarray_with_order passes along order for NumPy arrays."""
xp = pytest.importorskip(array_api)
Expand All @@ -117,8 +114,8 @@ def test_asarray_with_order(array_api):

def test_asarray_with_order_ignored():
"""Test _asarray_with_order ignores order for Generic ArrayAPI."""
xp = pytest.importorskip("numpy.array_api")
xp_ = _AdjustableNameAPITestWrapper(xp, "wrapped.array_api")
xp = pytest.importorskip("array_api_strict")
xp_ = _AdjustableNameAPITestWrapper(xp, "array_api_strict")

X = numpy.asarray([[1.2, 3.4, 5.1], [3.4, 5.5, 1.2]], order="C")
X = xp_.asarray(X)
Expand Down Expand Up @@ -308,7 +305,7 @@ def __init__(self, device_name):
# the following upstream issue has been fixed:
# https://github.com/cupy/cupy/issues/8180
@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("library", ["numpy", "numpy.array_api", "torch"])
@pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"])
@pytest.mark.parametrize(
"X,reduction,expected",
[
Expand Down Expand Up @@ -391,7 +388,7 @@ def fit(self, X, y=None):
"array_namespace, converter",
[
("torch", lambda array: array.cpu().numpy()),
("numpy.array_api", lambda array: numpy.asarray(array)),
("array_api_strict", lambda array: numpy.asarray(array)),
("cupy.array_api", lambda array: array._array.get()),
],
)
Expand All @@ -409,7 +406,7 @@ def test_convert_estimator_to_ndarray(array_namespace, converter):
@skip_if_array_api_compat_not_configured
def test_convert_estimator_to_array_api():
"""Convert estimator attributes to ArrayAPI arrays."""
xp = pytest.importorskip("numpy.array_api")
xp = pytest.importorskip("array_api_strict")

X_np = numpy.asarray([[1.3, 4.5]])
est = SimpleEstimator().fit(X_np)
Expand Down Expand Up @@ -438,7 +435,7 @@ def test_get_namespace_array_api_isdtype(wrapper):
"""Test isdtype implementation from _ArrayAPIWrapper and _NumPyAPIWrapper."""

if wrapper == _ArrayAPIWrapper:
xp_ = pytest.importorskip("numpy.array_api")
xp_ = pytest.importorskip("array_api_strict")
xp = _ArrayAPIWrapper(xp_)
else:
xp = _NumPyAPIWrapper()
Expand Down Expand Up @@ -473,3 +470,15 @@ def test_get_namespace_array_api_isdtype(wrapper):

with pytest.raises(ValueError, match="Unrecognized data type"):
assert xp.isdtype(xp.int16, "unknown")


@pytest.mark.parametrize(
"namespace, _device, _dtype", yield_namespace_device_dtype_combinations()
)
def test_indexing_dtype(namespace, _device, _dtype):
xp = _array_api_for_tests(namespace, _device)

if _IS_32BIT:
assert indexing_dtype(xp) == xp.int32
else:
assert indexing_dtype(xp) == xp.int64
6 changes: 3 additions & 3 deletions sklearn/utils/tests/test_estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,15 +529,15 @@ def test_check_array_api_input():
except ModuleNotFoundError:
raise SkipTest("array_api_compat is required to run this test")
try:
importlib.import_module("numpy.array_api")
importlib.import_module("array_api_strict")
except ModuleNotFoundError: # pragma: nocover
raise SkipTest("numpy.array_api is required to run this test")
raise SkipTest("array-api-strict is required to run this test")

with raises(AssertionError, match="Not equal to tolerance"):
check_array_api_input(
"BrokenArrayAPI",
BrokenArrayAPI(),
array_namespace="numpy.array_api",
array_namespace="array_api_strict",
check_values=True,
)

Expand Down
2 changes: 1 addition & 1 deletion sklearn/utils/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,7 +1972,7 @@ def test_pandas_array_returns_ndarray(input_values):


@skip_if_array_api_compat_not_configured
@pytest.mark.parametrize("array_namespace", ["numpy.array_api", "cupy.array_api"])
@pytest.mark.parametrize("array_namespace", ["array_api_strict", "cupy.array_api"])
def test_check_array_array_api_has_non_finite(array_namespace):
"""Checks that Array API arrays checks non-finite correctly."""
xp = pytest.importorskip(array_namespace)
Expand Down