-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[WIP] NEP-18 support for preprocessing algorithms #17744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
87a5386
d514b9d
aae6e33
005429f
f3e995f
f7bf531
3e4dea8
cb702d7
73834d2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2506,3 +2506,65 @@ def test_standard_scaler_sparse_partial_fit_finite_variance(X_2): | |
scaler = StandardScaler(with_mean=False) | ||
scaler.fit(X_1).partial_fit(X_2) | ||
assert np.isfinite(scaler.var_[0]) | ||
|
||
|
||
def test_minmax_scaler_cupy(): | ||
cp = pytest.importorskip("cupy") | ||
X_np = iris.data | ||
X_cp = cp.asarray(X_np) | ||
|
||
scaler = MinMaxScaler(copy=True) | ||
t_X_cp = scaler.fit_transform(X_cp) | ||
assert type(t_X_cp) == type(X_cp) | ||
assert t_X_cp.min() == 0 | ||
assert t_X_cp.max() == 1 | ||
|
||
r_X_cp = scaler.inverse_transform(t_X_cp) | ||
assert type(r_X_cp) == type(t_X_cp) | ||
|
||
r_X_cp = cp.asnumpy(r_X_cp) | ||
assert_almost_equal(r_X_cp, X_np, decimal=3) | ||
|
||
scaler = MinMaxScaler(copy=True) | ||
t_X_np = scaler.fit_transform(X_np) | ||
|
||
t_X_cp = cp.asnumpy(t_X_cp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this really needed? Doesn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't seem to work. |
||
assert_almost_equal(t_X_cp, t_X_np, decimal=3) | ||
|
||
|
||
def test_minmax_scale_cupy(): | ||
cp = pytest.importorskip("cupy") | ||
X_np = iris.data | ||
X_cp = cp.asarray(X_np) | ||
|
||
t_X_cp = minmax_scale(X_cp) | ||
assert type(t_X_cp) == type(X_cp) | ||
|
||
t_X_np = minmax_scale(X_np) | ||
|
||
t_X_cp = cp.asnumpy(t_X_cp) | ||
assert_almost_equal(t_X_cp, t_X_np, decimal=3) | ||
|
||
|
||
@pytest.mark.parametrize("with_mean", [True, False]) | ||
@pytest.mark.parametrize("with_std", [True, False]) | ||
def test_standard_scaler_cupy(with_mean, with_std): | ||
cp = pytest.importorskip("cupy") | ||
X_np = iris.data | ||
X_cp = cp.asarray(X_np) | ||
|
||
scaler = StandardScaler(copy=True, with_mean=with_mean, with_std=with_std) | ||
t_X_cp = scaler.fit_transform(X_cp) | ||
assert type(t_X_cp) == type(X_cp) | ||
|
||
r_X_cp = scaler.inverse_transform(t_X_cp) | ||
assert type(r_X_cp) == type(t_X_cp) | ||
|
||
r_X_cp = cp.asnumpy(r_X_cp) | ||
assert_almost_equal(r_X_cp, X_np, decimal=3) | ||
|
||
scaler = StandardScaler(copy=True, with_mean=with_mean, with_std=with_std) | ||
t_X_np = scaler.fit_transform(X_np) | ||
|
||
t_X_cp = cp.asnumpy(t_X_cp) | ||
assert_almost_equal(t_X_cp, t_X_np, decimal=3) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""Central place for array creation, to support non-numpy arrays. | ||
|
||
This currently leverages NEP18 via np.{empty|zeros|ones}_like to create | ||
non-numpy arrays. | ||
""" | ||
|
||
from .fixes import np_version | ||
|
||
import numpy as np | ||
|
||
|
||
def create_like(create, create_like): | ||
"""Generalization of (empty|zeros|ones)_like""" | ||
name = create.__name__ | ||
|
||
def metafunction(prototype, dtype=None, order='C', subok=True, shape=None): | ||
"""Forwards call to numpy.{name}_like or {name}, to be compatible with NEP18. | ||
|
||
Before numpy 1.17, numpy.{name}_like did not take a shape argument. | ||
|
||
When version of numpy < (1, 17), and shape is provided, the call will | ||
be forwarded to numpy.{name}. If shape is not provided, the call is | ||
forwarded to numpy.{name}_like. | ||
""".format(name=name) | ||
if np_version < (1, 17): | ||
if shape is not None: | ||
if dtype is None: | ||
if not hasattr(prototype, 'dtype'): | ||
raise NotImplementedError('Passed prototype to {name}_' | ||
'like without a dtype'. | ||
format(name=name)) | ||
dtype = prototype.dtype | ||
if order == 'A': | ||
order = 'F' if prototype.flags['F_CONTIGUOUS'] else 'C' | ||
elif order == 'K': | ||
raise NotImplementedError('order=K not implemented') | ||
return create(shape, dtype=dtype, order=order) | ||
else: | ||
return create_like(prototype, dtype=dtype, order=order, | ||
subok=subok) | ||
else: | ||
return create_like(prototype, dtype=dtype, order=order, | ||
shape=shape) | ||
return metafunction | ||
|
||
|
||
empty_like = create_like(np.empty, np.empty_like) | ||
zeros_like = create_like(np.zeros, np.zeros_like) | ||
ones_like = create_like(np.ones, np.ones_like) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that NEP-35 has been accepted this could could be simplified (on the dev version of numpy): There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We would still need a similar hack as a backword compat (e.g. in sklearn/utils/fixes.py) to make it work for older versions of numpy, but we should definitely use the actual NEP-35 implementation on numpy versions that support it. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import numpy as np | ||
import pytest | ||
from unittest.mock import MagicMock | ||
|
||
from sklearn.utils.array_creation import empty_like | ||
from sklearn.utils.array_creation import np_version | ||
import sklearn.utils.array_creation | ||
|
||
|
||
@pytest.mark.skipif(np_version < (1, 17), | ||
reason="NEP18 not supported before 1.17") | ||
def test_empty_like_nep18(): | ||
class ArrayLike: | ||
__array_function__ = MagicMock(return_value=42) | ||
|
||
# if NEP18 is supported, empty_like should be forwarded to us | ||
array_like = ArrayLike() | ||
value = empty_like(array_like, dtype=np.float32, shape=(4, 2)) | ||
assert value == 42 | ||
|
||
|
||
def test_empty_like(): | ||
# Normaly arrays should just work with all versions of numpy | ||
X = np.arange(8) | ||
Y = empty_like(X.reshape((4, 2))) | ||
assert isinstance(Y, np.ndarray) | ||
assert Y.shape == (4, 2) | ||
|
||
|
||
def test_empty_like_no_nep18(): | ||
class NotAnArray: | ||
def __array__(self): | ||
return np.arange(8, dtype=np.float64).reshape((4, 2)) | ||
try: | ||
# we trick this module into thinking it is working with an older | ||
# version to also test/cover this branch with newer versions of numpy | ||
real_np_version = sklearn.utils.array_creation.np_version | ||
sklearn.utils.array_creation.np_version = (1, 16) | ||
|
||
no_array = NotAnArray() | ||
empty_like(no_array, dtype=np.float32, shape=(4, 2)) | ||
# for numpy < 1.17, we should give an error msg, if we provide shape | ||
# with a non-numpy array, and no dtype | ||
with pytest.raises(NotImplementedError): | ||
empty_like(no_array, shape=(4, 2)) | ||
|
||
# we can pass a non-ndarray object, but without shape | ||
no_array = NotAnArray() | ||
an_array = empty_like(no_array, dtype=np.float32) | ||
assert an_array.shape == (4, 2) | ||
assert an_array.dtype == np.float32 | ||
|
||
# but with a ndarray, we can pass with shape | ||
second_array = empty_like(an_array, dtype=np.float64, shape=(3, 5)) | ||
assert second_array.shape == (3, 5) | ||
assert second_array.dtype == np.float64 | ||
|
||
# and the dtype is optional for ndarrays | ||
second_array_same_type = empty_like(an_array, shape=(3, 5)) | ||
assert second_array_same_type.shape == (3, 5) | ||
assert second_array_same_type.dtype == np.float32 | ||
|
||
c_like_array = empty_like(an_array.T, shape=(3, 5)) | ||
assert c_like_array.flags['C_CONTIGUOUS'] | ||
|
||
fortran_like_array = empty_like(an_array.T, order='A', shape=(3, 5)) | ||
assert fortran_like_array.flags['F_CONTIGUOUS'] | ||
|
||
# unlike numpy, we don't implement order=K | ||
with pytest.raises(NotImplementedError): | ||
empty_like(an_array, order='K', shape=(4, 2)) | ||
|
||
finally: | ||
sklearn.utils.array_creation.np_version = real_np_version |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe also check that the min and max values of
t_X_cp
are 0 and 1.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done