Skip to content

[MRG-1] ENH: Allow float32 to pass through with copy #5932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions sklearn/utils/sparsefuncs_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,9 @@ def inplace_csr_row_normalize_l1(X):
cdef unsigned int n_samples = X.shape[0]
cdef unsigned int n_features = X.shape[1]

cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
# might copy
cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data,
dtype=np.float64)
cdef np.ndarray[int, ndim=1] X_indices = X.indices
cdef np.ndarray[int, ndim=1] X_indptr = X.indptr

Expand Down Expand Up @@ -313,7 +315,9 @@ def inplace_csr_row_normalize_l2(X):
cdef unsigned int n_samples = X.shape[0]
cdef unsigned int n_features = X.shape[1]

cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data
# might copy
cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data,
dtype=np.float64)
cdef np.ndarray[int, ndim=1] X_indices = X.indices
cdef np.ndarray[int, ndim=1] X_indptr = X.indptr

Expand Down Expand Up @@ -364,7 +368,7 @@ def assign_rows_csr(X,
"""Densify selected rows of a CSR matrix into a preallocated array.

Like out[out_rows] = X[X_rows].toarray() but without copying.
Only supported for dtype=np.float64.
No-copy only supported for dtype=np.float64.

Parameters
----------
Expand All @@ -378,7 +382,9 @@ def assign_rows_csr(X,
# but int is what scipy.sparse uses.
int i, ind, j
np.npy_intp rX
np.ndarray[DOUBLE, ndim=1] data = X.data
# might copy
np.ndarray[DOUBLE, ndim=1] data = np.asarray(X.data,
dtype=np.float64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change to assign_rows_csr is not tested by the new regression test.

np.ndarray[int, ndim=1] indices = X.indices, indptr = X.indptr

if X_rows.shape[0] != out_rows.shape[0]:
Expand Down
11 changes: 10 additions & 1 deletion sklearn/utils/tests/test_sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
inplace_swap_row, inplace_swap_column,
min_max_axis,
count_nonzero, csc_median_axis_0)
from sklearn.utils.sparsefuncs_fast import assign_rows_csr
from sklearn.utils.sparsefuncs_fast import (assign_rows_csr,
inplace_csr_row_normalize_l1,
inplace_csr_row_normalize_l2)
from sklearn.utils.testing import assert_raises


Expand Down Expand Up @@ -478,3 +480,10 @@ def test_csc_row_median():

# Test that it raises an Error for non-csc matrices.
assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))


def test_inplace_normalize():
# regression tests for passing 32-bit floating point
X = sp.rand(10, 5, dtype=np.float32, density=.5).tocsr()
inplace_csr_row_normalize_l1(X)
inplace_csr_row_normalize_l2(X)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not check that X has been properly normalized, and in practice it cannot be the case as the data is array which is actually normalized is a copy of the input X.data when the dtype is not np.float64.