Skip to content

Use np.empty_like in PolynomialFeatures for NEP18 support #16196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ..utils import check_array
from ..utils.extmath import row_norms
from ..utils.extmath import _incremental_mean_and_var
from ..utils.array_creation import empty_like
from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
inplace_csr_row_normalize_l2)
from ..utils.sparsefuncs import (inplace_column_scale,
Expand Down Expand Up @@ -1582,8 +1583,8 @@ def transform(self, X):
columns.append(bias)
XP = sparse.hstack(columns, dtype=X.dtype).tocsc()
else:
XP = np.empty((n_samples, self.n_output_features_),
dtype=X.dtype, order=self.order)
XP = empty_like(X, order=self.order,
shape=(n_samples, self.n_output_features_))

# What follows is a faster implementation of:
# for i, comb in enumerate(combinations):
Expand Down
6 changes: 4 additions & 2 deletions sklearn/preprocessing/_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array
from ..utils.array_creation import zeros_like, ones_like
from ..utils.fixes import _argmax
from ..utils.validation import check_is_fitted

from ._label import _encode, _encode_check_unknown
Expand Down Expand Up @@ -100,8 +102,8 @@ def _fit(self, X, handle_unknown='error'):
def _transform(self, X, handle_unknown='error'):
X_list, n_samples, n_features = self._check_X(X)

X_int = np.zeros((n_samples, n_features), dtype=np.int)
X_mask = np.ones((n_samples, n_features), dtype=np.bool)
X_int = zeros_like(X, shape=(n_samples, n_features), dtype=np.int)
X_mask = ones_like(X, shape=(n_samples, n_features), dtype=np.bool)

if n_features != len(self.categories_):
raise ValueError(
Expand Down
49 changes: 49 additions & 0 deletions sklearn/utils/array_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Central place for array creation, to support non-numpy arrays.

This currently leverages NEP18 via np.{empty|zeros|ones}_like to create
non-numpy arrays.
"""

from .fixes import np_version

import numpy as np


def create_like(create, create_like):
"""Generalization of (empty|zeros|ones)_like"""
name = create.__name__

def metafunction(prototype, dtype=None, order='C', subok=True, shape=None):
"""Forwards call to numpy.{name}_like or {name}, to be compatible with NEP18.

Before numpy 1.17, numpy.{name}_like did not take a shape argument.

When version of numpy < (1, 17), and shape is provided, the call will
be forwarded to numpy.{name}. If shape is not provided, the call is
forwarded to numpy.{name}_like.
""".format(name=name)
if np_version < (1, 17):
if shape is not None:
if dtype is None:
if not hasattr(prototype, 'dtype'):
raise NotImplementedError('Passed prototype to {name}_'
'like without a dtype'.
format(name=name))
dtype = prototype.dtype
if order == 'A':
order = 'F' if prototype.flags['F_CONTIGUOUS'] else 'C'
elif order == 'K':
raise NotImplementedError('order=K not implemented')
return create(shape, dtype=dtype, order=order)
else:
return create_like(prototype, dtype=dtype, order=order,
subok=subok)
else:
return create_like(prototype, dtype=dtype, order=order,
subok=subok, shape=shape)
return metafunction


empty_like = create_like(np.empty, np.empty_like)
zeros_like = create_like(np.zeros, np.zeros_like)
ones_like = create_like(np.ones, np.ones_like)
74 changes: 74 additions & 0 deletions sklearn/utils/tests/test_array_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import numpy as np
import pytest
from unittest.mock import MagicMock

from sklearn.utils.array_creation import empty_like
from sklearn.utils.array_creation import np_version
import sklearn.utils.array_creation


@pytest.mark.skipif(np_version < (1, 17),
reason="NEP18 not supported before 1.17")
def test_empty_like_nep18():
class ArrayLike:
__array_function__ = MagicMock(return_value=42)

# if NEP18 is supported, empty_like should be forwarded to us
array_like = ArrayLike()
value = empty_like(array_like, dtype=np.float32, shape=(4, 2))
assert value == 42


def test_empty_like():
# Normaly arrays should just work with all versions of numpy
X = np.arange(8)
Y = empty_like(X.reshape((4, 2)))
assert isinstance(Y, np.ndarray)
assert Y.shape == (4, 2)


def test_empty_like_no_nep18():
class NotAnArray:
def __array__(self):
return np.arange(8, dtype=np.float64).reshape((4, 2))
try:
# we trick this module into thinking it is working with an older
# version to also test/cover this branch with newer versions of numpy
real_np_version = sklearn.utils.array_creation.np_version
sklearn.utils.array_creation.np_version = (1, 16)

no_array = NotAnArray()
empty_like(no_array, dtype=np.float32, shape=(4, 2))
# for numpy < 1.17, we should give an error msg, if we provide shape
# with a non-numpy array, and no dtype
with pytest.raises(NotImplementedError):
empty_like(no_array, shape=(4, 2))

# we can pass a non-ndarray object, but without shape
no_array = NotAnArray()
an_array = empty_like(no_array, dtype=np.float32)
assert an_array.shape == (4, 2)
assert an_array.dtype == np.float32

# but with a ndarray, we can pass with shape
second_array = empty_like(an_array, dtype=np.float64, shape=(3, 5))
assert second_array.shape == (3, 5)
assert second_array.dtype == np.float64

# and the dtype is optional for ndarrays
second_array_same_type = empty_like(an_array, shape=(3, 5))
assert second_array_same_type.shape == (3, 5)
assert second_array_same_type.dtype == np.float32

c_like_array = empty_like(an_array.T, shape=(3, 5))
assert c_like_array.flags['C_CONTIGUOUS']

fortran_like_array = empty_like(an_array.T, order='A', shape=(3, 5))
assert fortran_like_array.flags['F_CONTIGUOUS']

# unlike numpy, we don't implement order=K
with pytest.raises(NotImplementedError):
empty_like(an_array, order='K', shape=(4, 2))

finally:
sklearn.utils.array_creation.np_version = real_np_version