scikit-learn · GaelVaroquaux · May 11, 2015 · Apr 23, 2015 · May 8, 2015 · May 8, 2015
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -22,7 +22,8 @@
 from ..utils.fixes import isclose
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
-from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis)
+from ..utils.sparsefuncs import (inplace_column_scale, mean_variance_axis,
+                                 min_max_axis)
 from ..utils.validation import check_is_fitted
 
 zip = six.moves.zip
@@ -570,7 +571,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
         scipy.sparse matrices should be in CSR format to avoid an
         un-necessary copy.
 
-    norm : 'l1' or 'l2', optional ('l2' by default)
+    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
         The norm to use to normalize each non zero sample (or each non-zero
         feature if axis is 0).
 
@@ -589,7 +590,7 @@ def normalize(X, norm='l2', axis=1, copy=True):
     using the ``Transformer`` API (e.g. as part of a preprocessing
     :class:`sklearn.pipeline.Pipeline`)
     """
-    if norm not in ('l1', 'l2'):
+    if norm not in ('l1', 'l2', 'max'):
         raise ValueError("'%s' is not a supported norm" % norm)
 
     if axis == 0:
@@ -609,13 +610,19 @@ def normalize(X, norm='l2', axis=1, copy=True):
             inplace_csr_row_normalize_l1(X)
         elif norm == 'l2':
             inplace_csr_row_normalize_l2(X)
+        elif norm == 'max':
+            _, norms = min_max_axis(X, 1)
+            norms = norms.repeat(np.diff(X.indptr))
+            mask = norms != 0
+            X.data[mask] /= norms[mask]
     else:
         if norm == 'l1':
             norms = np.abs(X).sum(axis=1)
-            norms[norms == 0.0] = 1.0
         elif norm == 'l2':
             norms = row_norms(X)
-            norms[norms == 0.0] = 1.0
+        elif norm == 'max':
+            norms = np.max(X, axis=1)
+        norms[norms == 0.0] = 1.0
         X /= norms[:, np.newaxis]
 
     if axis == 0:
@@ -643,7 +650,7 @@ class Normalizer(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    norm : 'l1' or 'l2', optional ('l2' by default)
+    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
         The norm to use to normalize each non zero sample.
 
     copy : boolean, optional, default True

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -604,6 +604,55 @@ def test_normalizer_l2():
         assert_almost_equal(la.norm(X_norm[3]), 0.0)
 
 
+def test_normalizer_max():
+    rng = np.random.RandomState(0)
+    X_dense = rng.randn(4, 5)
+    X_sparse_unpruned = sparse.csr_matrix(X_dense)
+
+    # set the row number 3 to zero
+    X_dense[3, :] = 0.0
+
+    # set the row number 3 to zero without pruning (can happen in real life)
+    indptr_3 = X_sparse_unpruned.indptr[3]
+    indptr_4 = X_sparse_unpruned.indptr[4]
+    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0
+
+    # build the pruned variant using the regular constructor
+    X_sparse_pruned = sparse.csr_matrix(X_dense)
+
+    # check inputs that support the no-copy optim
+    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):
+
+        normalizer = Normalizer(norm='max', copy=True)
+        X_norm1 = normalizer.transform(X)
+        assert_true(X_norm1 is not X)
+        X_norm1 = toarray(X_norm1)
+
+        normalizer = Normalizer(norm='max', copy=False)
+        X_norm2 = normalizer.transform(X)
+        assert_true(X_norm2 is X)
+        X_norm2 = toarray(X_norm2)
+
+        for X_norm in (X_norm1, X_norm2):
+            row_maxs = X_norm.max(axis=1)
+            for i in range(3):
+                assert_almost_equal(row_maxs[i], 1.0)
+            assert_almost_equal(row_maxs[3], 0.0)
+
+    # check input for which copy=False won't prevent a copy
+    for init in (sparse.coo_matrix, sparse.csc_matrix, sparse.lil_matrix):
+        X = init(X_dense)
+        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)
+
+        assert_true(X_norm is not X)
+        assert_true(isinstance(X_norm, sparse.csr_matrix))
+
+        X_norm = toarray(X_norm)
+        for i in range(3):
+            assert_almost_equal(row_maxs[i], 1.0)
+        assert_almost_equal(la.norm(X_norm[3]), 0.0)
+
+
 def test_normalize():
     # Test normalize function
     # Only tests functionality not used by the tests for Normalizer.