scikit-learn · amueller · Oct 16, 2015 · Oct 16, 2015 · lesteve · Oct 16, 2015
diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
@@ -252,15 +252,36 @@ def data_range(self):
     def data_min(self):
         return self.data_min_
 
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+
+        __init__ parameters are not touched.
+        """
+
+        # Checking one attribute is enough, becase they are all set together
+        # in partial_fit
+        if hasattr(self, 'scale_'):
+            del self.scale_
+            del self.min_
+            del self.n_samples_seen_
+            del self.data_min_
+            del self.data_max_
+            del self.data_range_
+
     def fit(self, X, y=None):
         """Compute the minimum and maximum to be used for later scaling.
 
+        It always resets the object's internal state first.
+
         Parameters
         ----------
         X : array-like, shape [n_samples, n_features]
             The data used to compute the per-feature minimum and maximum
             used for later scaling along the features axis.
         """
+
+        # Reset internal state before fitting
+        self._reset()
         return self.partial_fit(X, y)
 
     def partial_fit(self, X, y=None):
@@ -489,9 +510,25 @@ def __init__(self, copy=True, with_mean=True, with_std=True):
     def std_(self):
         return self.scale_
 
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+
+        __init__ parameters are not touched.
+        """
+
+        # Checking one attribute is enough, becase they are all set together
+        # in partial_fit
+        if hasattr(self, 'scale_'):
+            del self.scale_
+            del self.n_samples_seen_
+            del self.mean_
+            del self.var_
+
     def fit(self, X, y=None):
         """Compute the mean and std to be used for later scaling.
 
+        It always resets the object's internal state first.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape [n_samples, n_features]
@@ -500,6 +537,9 @@ def fit(self, X, y=None):
 
         y: Passthrough for ``Pipeline`` compatibility.
         """
+
+        # Reset internal state before fitting
+        self._reset()
         return self.partial_fit(X, y)
 
     def partial_fit(self, X, y=None):
@@ -671,15 +711,33 @@ class MaxAbsScaler(BaseEstimator, TransformerMixin):
     def __init__(self, copy=True):
         self.copy = copy
 
+    def _reset(self):
+        """Reset internal data-dependent state of the scaler, if necessary.
+
+        __init__ parameters are not touched.
+        """
+
+        # Checking one attribute is enough, becase they are all set together
+        # in partial_fit
+        if hasattr(self, 'scale_'):
+            del self.scale_
+            del self.n_samples_seen_
+            del self.max_abs_
+
     def fit(self, X, y=None):
         """Compute the maximum absolute value to be used for later scaling.
 
+        It always resets the object's internal state first.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape [n_samples, n_features]
             The data used to compute the per-feature minimum and maximum
             used for later scaling along the features axis.
         """
+
+        # Reset internal state before fitting
+        self._reset()
         return self.partial_fit(X, y)
 
     def partial_fit(self, X, y=None):

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -1498,3 +1498,19 @@ def test_one_hot_encoder_unknown_transform():
     oh = OneHotEncoder(handle_unknown='42')
     oh.fit(X)
     assert_raises(ValueError, oh.transform, y)
+
+
+def test_fit_cold_start():
+    X = iris.data
+    X_2d = X[:, :2]
+
+    # Scalers that have a partial_fit method
+    scalers = [StandardScaler(with_mean=False, with_std=False),
+               MinMaxScaler(),
+               MaxAbsScaler()]
+
+    for scaler in scalers:
+        scaler.fit_transform(X)
+        # with a different shape, this may break the scaler unless the internal
+        # state is reset
+        scaler.fit_transform(X_2d)