scikit-learn · ogrisel · Jun 1, 2021 · Jun 1, 2021 · Jun 1, 2021 · Jun 1, 2021
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -504,15 +504,6 @@ def test_vectorizer():
     with pytest.raises(ValueError):
         t3.transform(counts_train)
 
-    # test idf transform with incompatible n_features
-    X = [[1, 1, 5],
-         [1, 1, 0]]
-    t3.fit(X)
-    X_incompt = [[1, 3],
-                 [1, 3]]
-    with pytest.raises(ValueError):
-        t3.transform(X_incompt)
-
     # L1-normalized term frequencies sum to one
     assert_array_almost_equal(np.sum(tf, axis=1), [1.0] * n_train)
 

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
@@ -1386,6 +1386,11 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.20
 
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 1.0
+
     Examples
     --------
     >>> from sklearn.feature_extraction.text import TfidfTransformer
@@ -1436,7 +1441,7 @@ def fit(self, X, y=None):
         X : sparse matrix of shape n_samples, n_features)
             A matrix of term/token counts.
         """
-        X = check_array(X, accept_sparse=('csr', 'csc'))
+        X = self._validate_data(X, accept_sparse=('csr', 'csc'))
         if not sp.issparse(X):
             X = sp.csr_matrix(X)
         dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64
@@ -1476,7 +1481,8 @@ def transform(self, X, copy=True):
         -------
         vectors : sparse matrix of shape (n_samples, n_features)
         """
-        X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES, copy=copy)
+        X = self._validate_data(X, accept_sparse='csr',
+                                dtype=FLOAT_DTYPES, copy=copy, reset=False)
         if not sp.issparse(X):
             X = sp.csr_matrix(X, dtype=np.float64)
 
@@ -1493,11 +1499,6 @@ def transform(self, X, copy=True):
             check_is_fitted(self, attributes=["idf_"],
                             msg='idf vector is not fitted')
 
-            expected_n_features = self._idf_diag.shape[0]
-            if n_features != expected_n_features:
-                raise ValueError("Input has n_features=%d while the model"
-                                 " has been trained with n_features=%d" % (
-                                     n_features, expected_n_features))
             # *= doesn't work
             X = X * self._idf_diag
 

diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -262,7 +262,6 @@ def test_search_cv(estimator, check, request):
 # check_classifiers_train would need to be updated with the error message
 N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {
     'compose',
-    'feature_extraction',
     'model_selection',
     'multiclass',
     'multioutput',

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -3121,7 +3121,8 @@ def check_n_features_in_after_fitting(name, estimator_orig):
     # Make sure that n_features_in are checked after fitting
     tags = _safe_tags(estimator_orig)
 
-    if "2darray" not in tags["X_types"] or tags["no_validation"]:
+    if ("2darray" not in tags["X_types"] and "sparse" not in tags["X_types"] or
+            tags["no_validation"]):
         return
 
     rng = np.random.RandomState(0)