Fix requires_fit tag for stateless FeatureHasher and HashingVectorizer (#31851)

hqkqn32 · jeremiedbb · web-flow · commit 52d93e141a5d · 2025-08-04T10:41:00.000Z
Co-authored-by: Jérémie du Boisberranger &lt;jeremie@probabl.ai&gt;
diff --git a/doc/whats_new/upcoming_changes/sklearn.feature_extraction/31851.fix.rst b/doc/whats_new/upcoming_changes/sklearn.feature_extraction/31851.fix.rst
@@ -0,0 +1,4 @@
+- ﻿Set the tag `requires_fit=False` for the classes
+  :class:`feature_extraction.FeatureHasher` and
+  :class:`feature_extraction.HashingVectorizer`.
+  By :user:`hakan çanakcı <hqkqn32>`.
diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py
@@ -204,4 +204,5 @@ def __sklearn_tags__(self):
             tags.input_tags.string = True
         elif self.input_type == "dict":
             tags.input_tags.dict = True
+        tags.requires_fit = False
         return tags
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -158,3 +158,18 @@ def test_hash_collisions():
         alternate_sign=False, n_features=1, input_type="string"
     ).fit_transform(X)
     assert Xt.data[0] == len(X[0])
+
+
+def test_feature_hasher_requires_fit_tag():
+    """Test that FeatureHasher has requires_fit=False tag."""
+    hasher = FeatureHasher()
+    tags = hasher.__sklearn_tags__()
+    assert not tags.requires_fit
+
+
+def test_feature_hasher_transform_without_fit():
+    """Test that FeatureHasher can transform without fitting."""
+    hasher = FeatureHasher(n_features=10)
+    data = [{"dog": 1, "cat": 2}, {"dog": 2, "run": 5}]
+    result = hasher.transform(data)
+    assert result.shape == (2, 10)
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -1626,3 +1626,18 @@ def test_tfidf_vectorizer_perserve_dtype_idf(dtype):
     X = [str(uuid.uuid4()) for i in range(100_000)]
     vectorizer = TfidfVectorizer(dtype=dtype).fit(X)
     assert vectorizer.idf_.dtype == dtype
+
+
+def test_hashing_vectorizer_requires_fit_tag():
+    """Test that HashingVectorizer has requires_fit=False tag."""
+    vectorizer = HashingVectorizer()
+    tags = vectorizer.__sklearn_tags__()
+    assert not tags.requires_fit
+
+
+def test_hashing_vectorizer_transform_without_fit():
+    """Test that HashingVectorizer can transform without fitting."""
+    vectorizer = HashingVectorizer(n_features=10)
+    corpus = ["This is test", "Another test"]
+    result = vectorizer.transform(corpus)
+    assert result.shape == (2, 10)
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
@@ -923,6 +923,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.string = True
         tags.input_tags.two_d_array = False
+        tags.requires_fit = False
         return tags