scikit-learn · jnothman · Nov 5, 2016 · Oct 3, 2016 · Oct 28, 2016 · Nov 1, 2016
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -65,10 +65,14 @@ Enhancements
    - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
      :issue:`7723` by `Mikhail Korobov`_.
 
+   - ``check_estimator`` now attempts to ensure that methods transform, predict, etc.
+     do not set attributes on the estimator.
+     :issue:`7533` by `Ekaterina Krivich`_.
+
 Bug fixes
 .........
 
-   - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not 
+   - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
      exactly implement Benjamini-Hochberg procedure. It formerly may have
      selected fewer features than it should.
      :issue:`7490` by `Peng Meng`_.
@@ -88,6 +92,9 @@ Bug fixes
    - Tree splitting criterion classes' cloning/pickling is now memory safe
      :issue:`7680` by `Ibraim Ganiev`_.
 
+   - Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
+     attribute in `transform()`. :issue:`7553` by `Ekaterina Krivich`_.
+
 .. _changes_0_18_1:
 
 Version 0.18.1

diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
@@ -1016,14 +1016,6 @@ def fit_transform(self, X, y=None, W=None, H=None):
         H : array-like, shape (n_components, n_features)
             If init='custom', it is used as initial guess for the solution.
 
-        Attributes
-        ----------
-        components_ : array-like, shape (n_components, n_features)
-            Factorization matrix, sometimes called 'dictionary'.
-
-        n_iter_ : int
-            Actual number of iterations for the transform.
-
         Returns
         -------
         W: array, shape (n_samples, n_components)
@@ -1061,14 +1053,6 @@ def fit(self, X, y=None, **params):
         X: {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be decomposed
 
-        Attributes
-        ----------
-        components_ : array-like, shape (n_components, n_features)
-            Factorization matrix, sometimes called 'dictionary'.
-
-        n_iter_ : int
-            Actual number of iterations for the transform.
-
         Returns
         -------
         self
@@ -1084,11 +1068,6 @@ def transform(self, X):
         X: {array-like, sparse matrix}, shape (n_samples, n_features)
             Data matrix to be transformed by the model
 
-        Attributes
-        ----------
-        n_iter_ : int
-            Actual number of iterations for the transform.
-
         Returns
         -------
         W: array, shape (n_samples, n_components)
@@ -1106,7 +1085,6 @@ def transform(self, X):
             nls_max_iter=self.nls_max_iter, sparseness=self.sparseness,
             beta=self.beta, eta=self.eta)
 
-        self.n_iter_ = n_iter_
         return W
 
     def inverse_transform(self, W):

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -30,6 +30,7 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns
+from sklearn.utils.testing import assert_dict_equal
 
 
 from sklearn.base import (clone, ClassifierMixin, RegressorMixin,
@@ -230,6 +231,7 @@ def _yield_all_checks(name, Estimator):
     yield check_fit1d_1feature
     yield check_fit1d_1sample
     yield check_get_params_invariance
+    yield check_dict_unchanged
 
 
 def check_estimator(Estimator):
@@ -409,6 +411,49 @@ def check_dtype_object(name, Estimator):
 
 
 @ignore_warnings
+def check_dict_unchanged(name, Estimator):
+    # this estimator raises
+    # ValueError: Found array with 0 feature(s) (shape=(23, 0))
+    # while a minimum of 1 is required.
+    # error
+    if name in ['SpectralCoclustering']:
+        return
+    rnd = np.random.RandomState(0)
+    if name in ['RANSACRegressor']:
+        X = 3 * rnd.uniform(size=(20, 3))
+    else:
+        X = 2 * rnd.uniform(size=(20, 3))
+
+    y = X[:, 0].astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    estimator = Estimator()
+    set_testing_parameters(estimator)
+    if hasattr(estimator, "n_components"):
+        estimator.n_components = 1
+
+    if hasattr(estimator, "n_clusters"):
+        estimator.n_clusters = 1
+
+    if hasattr(estimator, "n_best"):
+        estimator.n_best = 1
+
+    set_random_state(estimator, 1)
+
+    # should be just `estimator.fit(X, y)`
+    # after merging #6141
+    if name in ['SpectralBiclustering']:
+        estimator.fit(X)
+    else:
+        estimator.fit(X, y)
+    for method in ["predict", "transform", "decision_function",
+                   "predict_proba"]:
+        if hasattr(estimator, method):
+            dict_before = estimator.__dict__.copy()
+            getattr(estimator, method)(X)
+            assert_dict_equal(estimator.__dict__, dict_before,
+                              'Estimator changes __dict__ during %s' % method)
+
+
 def check_fit2d_predict1d(name, Estimator):
     # check by fitting a 2d array and prediting with a 1d array
     rnd = np.random.RandomState(0)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
@@ -28,6 +28,20 @@ def predict(self, X):
         return np.ones(X.shape[0])
 
 
+class ChangesDict(BaseEstimator):
+    def __init__(self):
+        self.key = 0
+
+    def fit(self, X, y=None):
+        X, y = check_X_y(X, y)
+        return self
+
+    def predict(self, X):
+        X = check_array(X)
+        self.key = 1000
+        return np.ones(X.shape[0])
+
+
 class NoCheckinPredict(BaseBadClassifier):
     def fit(self, X, y):
         X, y = check_X_y(X, y)
@@ -75,6 +89,11 @@ def test_check_estimator():
     # check that predict does input validation (doesn't accept dicts in input)
     msg = "Estimator doesn't check for NaN and inf in predict"
     assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
+    # check that estimator state does not change
+    # at transform/predict/predict_proba time
+    msg = 'Estimator changes __dict__ during predict'
+    assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict)
+
     # check for sparse matrix input handling
     name = NoSparseClassifier.__name__
     msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data"