scikit-learn · jnothman · Jun 14, 2018 · Jun 11, 2018 · Jun 11, 2018 · Jun 11, 2018
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
@@ -504,6 +504,14 @@ Preprocessing
   when returning a sparse matrix output. :issue:`11042` by :user:`Daniel
   Morales <DanielMorales9>`.
 
+- Fix ``fit`` and ``partial_fit`` in :class:`preprocessing.StandardScaler` in
+  the rare case when `with_mean=False` and `with_std=False` which was crashing
+  by calling ``fit`` more than once and giving inconsistent results for
+  ``mean_`` whether the input was a sparse or a dense matrix. ``mean_`` will be
+  set to ``None`` with both sparse and dense inputs. ``n_samples_seen_`` will
+  be also reported for both input types.
+  :issue:`11235` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Feature selection
 
 - Fixed computation of ``n_features_to_compute`` for edge case with tied CV

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -652,6 +652,10 @@ def partial_fit(self, X, y=None):
             else:
                 self.mean_ = None
                 self.var_ = None
+                if not hasattr(self, 'n_samples_seen_'):
+                    self.n_samples_seen_ = X.shape[0]
+                else:
+                    self.n_samples_seen_ += X.shape[0]
         else:
             # First pass
             if not hasattr(self, 'n_samples_seen_'):
@@ -662,9 +666,14 @@ def partial_fit(self, X, y=None):
                 else:
                     self.var_ = None
 
-            self.mean_, self.var_, self.n_samples_seen_ = \
-                _incremental_mean_and_var(X, self.mean_, self.var_,
-                                          self.n_samples_seen_)
+            if not self.with_mean and not self.with_std:
+                self.mean_ = None
+                self.var_ = None
+                self.n_samples_seen_ += X.shape[0]
+            else:
+                self.mean_, self.var_, self.n_samples_seen_ = \
+                    _incremental_mean_and_var(X, self.mean_, self.var_,
+                                              self.n_samples_seen_)
 
         if self.with_std:
             self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -7,6 +7,7 @@
 
 import warnings
 import re
+import itertools
 
 import numpy as np
 import numpy.linalg as la
@@ -60,6 +61,7 @@
 from sklearn.preprocessing.data import power_transform
 from sklearn.exceptions import DataConversionWarning, NotFittedError
 
+from sklearn.base import clone
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import cross_val_predict
 from sklearn.svm import SVR
@@ -701,6 +703,63 @@ def test_scaler_without_centering():
     assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
 
 
+def _check_identity_scalers_attributes(scaler_1, scaler_2):
+    assert scaler_1.mean_ is scaler_2.mean_ is None
+    assert scaler_1.var_ is scaler_2.var_ is None
+    assert scaler_1.scale_ is scaler_2.scale_ is None
+    assert scaler_1.n_samples_seen_ == scaler_2.n_samples_seen_
+
+
+def test_scaler_return_identity():
+    # test that the scaler return identity when with_mean and with_std are
+    # False
+    X_dense = np.array([[0, 1, 3],
+                        [5, 6, 0],
+                        [8, 0, 10]],
+                       dtype=np.float64)
+    X_csr = sparse.csr_matrix(X_dense)
+    X_csc = X_csr.tocsc()
+
+    transformer_dense = StandardScaler(with_mean=False, with_std=False)
+    X_trans_dense = transformer_dense.fit_transform(X_dense)
+
+    transformer_csr = clone(transformer_dense)
+    X_trans_csr = transformer_csr.fit_transform(X_csr)
+
+    transformer_csc = clone(transformer_dense)
+    X_trans_csc = transformer_csc.fit_transform(X_csc)
+
+    assert_allclose(X_trans_csr.toarray(), X_csr.toarray())
+    assert_allclose(X_trans_csc.toarray(), X_csc.toarray())
+    assert_allclose(X_trans_dense, X_dense)
+
+    for trans_1, trans_2 in itertools.combinations([transformer_dense,
+                                                    transformer_csr,
+                                                    transformer_csc],
+                                                   2):
+        _check_identity_scalers_attributes(trans_1, trans_2)
+
+    transformer_dense.partial_fit(X_dense)
+    transformer_csr.partial_fit(X_csr)
+    transformer_csc.partial_fit(X_csc)
+
+    for trans_1, trans_2 in itertools.combinations([transformer_dense,
+                                                    transformer_csr,
+                                                    transformer_csc],
+                                                   2):
+        _check_identity_scalers_attributes(trans_1, trans_2)
+
+    transformer_dense.fit(X_dense)
+    transformer_csr.fit(X_csr)
+    transformer_csc.fit(X_csc)
+
+    for trans_1, trans_2 in itertools.combinations([transformer_dense,
+                                                    transformer_csr,
+                                                    transformer_csc],
+                                                   2):
+        _check_identity_scalers_attributes(trans_1, trans_2)
+
+
 def test_scaler_int():
     # test that scaler converts integer input to floating
     # for both sparse and dense matrices