-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[MRG + 1] ENH: preprocess: adding a max-normalization option #4695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9e710ed
007ae76
84ee88d
5fcad7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -604,6 +604,55 @@ def test_normalizer_l2(): | |
assert_almost_equal(la.norm(X_norm[3]), 0.0) | ||
|
||
|
||
def test_normalizer_max(): | ||
rng = np.random.RandomState(0) | ||
X_dense = rng.randn(4, 5) | ||
X_sparse_unpruned = sparse.csr_matrix(X_dense) | ||
|
||
# set the row number 3 to zero | ||
X_dense[3, :] = 0.0 | ||
|
||
# set the row number 3 to zero without pruning (can happen in real life) | ||
indptr_3 = X_sparse_unpruned.indptr[3] | ||
indptr_4 = X_sparse_unpruned.indptr[4] | ||
X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0 | ||
|
||
# build the pruned variant using the regular constructor | ||
X_sparse_pruned = sparse.csr_matrix(X_dense) | ||
|
||
# check inputs that support the no-copy optim | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not entirely sure I understand the test. You never check if the no-copy optimization actually works, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just copied the test structure from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right. I overlooked that. Seems fine. |
||
for X in (X_dense, X_sparse_pruned, X_sparse_unpruned): | ||
|
||
normalizer = Normalizer(norm='max', copy=True) | ||
X_norm1 = normalizer.transform(X) | ||
assert_true(X_norm1 is not X) | ||
X_norm1 = toarray(X_norm1) | ||
|
||
normalizer = Normalizer(norm='max', copy=False) | ||
X_norm2 = normalizer.transform(X) | ||
assert_true(X_norm2 is X) | ||
X_norm2 = toarray(X_norm2) | ||
|
||
for X_norm in (X_norm1, X_norm2): | ||
row_maxs = X_norm.max(axis=1) | ||
for i in range(3): | ||
assert_almost_equal(row_maxs[i], 1.0) | ||
assert_almost_equal(row_maxs[3], 0.0) | ||
|
||
# check input for which copy=False won't prevent a copy | ||
for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix): | ||
X = init(X_dense) | ||
X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X) | ||
|
||
assert_true(X_norm is not X) | ||
assert_true(isinstance(X_norm, sparse.csr_matrix)) | ||
|
||
X_norm = toarray(X_norm) | ||
for i in range(3): | ||
assert_almost_equal(row_maxs[i], 1.0) | ||
assert_almost_equal(la.norm(X_norm[3]), 0.0) | ||
|
||
|
||
def test_normalize(): | ||
# Test normalize function | ||
# Only tests functionality not used by the tests for Normalizer. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like we should raise a ValueError "else". If you could add that and add a test, it would be much appreciated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Never mind, missed the check above.