File tree 2 files changed +12
-9
lines changed
2 files changed +12
-9
lines changed Original file line number Diff line number Diff line change @@ -107,3 +107,15 @@ def test_hasher_zeros():
107
107
# Assert that no zeros are materialized in the output.
108
108
X = FeatureHasher ().transform ([{'foo' : 0 }])
109
109
assert_equal (X .data .shape , (0 ,))
110
+
111
+
112
+ def test_hash_collision ():
113
+ # Ensure that hash collision does not produce zero elements
114
+ # in the output sparse array (issue #3637)
115
+ raw_X = ["ab" , "ac" , "ad" , "ae" ]
116
+ fh = FeatureHasher (non_negative = False , input_type = "string" , n_features = 1 )
117
+ X = fh .transform (raw_X )
118
+ assert_true (len (X .data ) <= 2 * len (raw_X )) # we have feature collisions
119
+ assert_equal ((X .data == 0. ).sum (), 0 )
120
+
121
+
Original file line number Diff line number Diff line change @@ -944,15 +944,6 @@ def func():
944
944
assert_raise_message (exception , message , func )
945
945
946
946
947
- def test_hashingvectorizer_hash_collision ():
948
- # Ensure that hash collision does not produce zero elements
949
- # in the output sparse array (issue #3637)
950
- text = 'investigation need records'
951
- hv = HashingVectorizer (ngram_range = (1 , 2 ), non_negative = True )
952
- X = hv .transform ([text ])
953
- assert_equal ((X .data == 0. ).sum (), 0 )
954
-
955
-
956
947
def test_tfidfvectorizer_binary ():
957
948
# Non-regression test: TfidfVectorizer used to ignore its "binary" param.
958
949
v = TfidfVectorizer (binary = True , use_idf = False , norm = None )
You can’t perform that action at this time.
0 commit comments