diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index fdf59957f0afe..aea2730b2864f 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -278,7 +278,9 @@ def inplace_csr_row_normalize_l1(X): cdef unsigned int n_samples = X.shape[0] cdef unsigned int n_features = X.shape[1] - cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data + # might copy + cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data, + dtype=np.float64) cdef np.ndarray[int, ndim=1] X_indices = X.indices cdef np.ndarray[int, ndim=1] X_indptr = X.indptr @@ -313,7 +315,9 @@ def inplace_csr_row_normalize_l2(X): cdef unsigned int n_samples = X.shape[0] cdef unsigned int n_features = X.shape[1] - cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data + # might copy + cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data, + dtype=np.float64) cdef np.ndarray[int, ndim=1] X_indices = X.indices cdef np.ndarray[int, ndim=1] X_indptr = X.indptr @@ -364,7 +368,7 @@ def assign_rows_csr(X, """Densify selected rows of a CSR matrix into a preallocated array. Like out[out_rows] = X[X_rows].toarray() but without copying. - Only supported for dtype=np.float64. + No-copy only supported for dtype=np.float64. Parameters ---------- @@ -378,7 +382,9 @@ def assign_rows_csr(X, # but int is what scipy.sparse uses. int i, ind, j np.npy_intp rX - np.ndarray[DOUBLE, ndim=1] data = X.data + # might copy + np.ndarray[DOUBLE, ndim=1] data = np.asarray(X.data, + dtype=np.float64) np.ndarray[int, ndim=1] indices = X.indices, indptr = X.indptr if X_rows.shape[0] != out_rows.shape[0]: diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index 9dad43a18e0ca..ce4dfc009c721 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -14,7 +14,9 @@ inplace_swap_row, inplace_swap_column, min_max_axis, count_nonzero, csc_median_axis_0) -from sklearn.utils.sparsefuncs_fast import assign_rows_csr +from sklearn.utils.sparsefuncs_fast import (assign_rows_csr, + inplace_csr_row_normalize_l1, + inplace_csr_row_normalize_l2) from sklearn.utils.testing import assert_raises @@ -478,3 +480,10 @@ def test_csc_row_median(): # Test that it raises an Error for non-csc matrices. assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X)) + + +def test_inplace_normalize(): + # regression tests for passing 32-bit floating point + X = sp.rand(10, 5, dtype=np.float32, density=.5).tocsr() + inplace_csr_row_normalize_l1(X) + inplace_csr_row_normalize_l2(X)