From f1b7a11335b3badee0b7404b427e975d38f5b12c Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Sun, 29 Nov 2015 09:27:50 -0600 Subject: [PATCH 1/3] TST: Regression test for 32-bit input --- sklearn/utils/tests/test_sparsefuncs.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index 9dad43a18e0ca..ce4dfc009c721 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -14,7 +14,9 @@ inplace_swap_row, inplace_swap_column, min_max_axis, count_nonzero, csc_median_axis_0) -from sklearn.utils.sparsefuncs_fast import assign_rows_csr +from sklearn.utils.sparsefuncs_fast import (assign_rows_csr, + inplace_csr_row_normalize_l1, + inplace_csr_row_normalize_l2) from sklearn.utils.testing import assert_raises @@ -478,3 +480,10 @@ def test_csc_row_median(): # Test that it raises an Error for non-csc matrices. assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X)) + + +def test_inplace_normalize(): + # regression tests for passing 32-bit floating point + X = sp.rand(10, 5, dtype=np.float32, density=.5).tocsr() + inplace_csr_row_normalize_l1(X) + inplace_csr_row_normalize_l2(X) From 6a4fa6255dd229cde622de8bad92ac8591add991 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Sat, 28 Nov 2015 11:08:23 -0600 Subject: [PATCH 2/3] ENH: Allow float32 to pass through with copy --- sklearn/utils/sparsefuncs_fast.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index fdf59957f0afe..4a2f7a39af480 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -278,7 +278,9 @@ def inplace_csr_row_normalize_l1(X): cdef unsigned int n_samples = X.shape[0] cdef unsigned int n_features = X.shape[1] - cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data + # might copy + cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data, + dtype=np.float64) cdef np.ndarray[int, ndim=1] X_indices = X.indices cdef np.ndarray[int, ndim=1] X_indptr = X.indptr @@ -313,7 +315,9 @@ def inplace_csr_row_normalize_l2(X): cdef unsigned int n_samples = X.shape[0] cdef unsigned int n_features = X.shape[1] - cdef np.ndarray[DOUBLE, ndim=1] X_data = X.data + # might copy + cdef np.ndarray[DOUBLE, ndim=1] X_data = np.asarray(X.data, + dtype=np.float64) cdef np.ndarray[int, ndim=1] X_indices = X.indices cdef np.ndarray[int, ndim=1] X_indptr = X.indptr @@ -378,7 +382,9 @@ def assign_rows_csr(X, # but int is what scipy.sparse uses. int i, ind, j np.npy_intp rX - np.ndarray[DOUBLE, ndim=1] data = X.data + # might copy + np.ndarray[DOUBLE, ndim=1] data = np.asarray(X.data, + dtype=np.float64) np.ndarray[int, ndim=1] indices = X.indices, indptr = X.indptr if X_rows.shape[0] != out_rows.shape[0]: From fe6b2255bf50093d41b3ee6fffc7470deade05b1 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Sun, 29 Nov 2015 09:28:34 -0600 Subject: [PATCH 3/3] DOC: Clarify doc for copy behavior --- sklearn/utils/sparsefuncs_fast.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index 4a2f7a39af480..aea2730b2864f 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -368,7 +368,7 @@ def assign_rows_csr(X, """Densify selected rows of a CSR matrix into a preallocated array. Like out[out_rows] = X[X_rows].toarray() but without copying. - Only supported for dtype=np.float64. + No-copy only supported for dtype=np.float64. Parameters ----------