@@ -299,7 +299,7 @@ def n_nonzero_columns(X):
299
299
#
300
300
# Now we make a similar experiment with the
301
301
# :func:`~sklearn.feature_extraction.text.HashingVectorizer`, which is
302
- # equivalent to combining the “ hashing trick” implemented by the
302
+ # equivalent to combining the " hashing trick" implemented by the
303
303
# :func:`~sklearn.feature_extraction.FeatureHasher` class and the text
304
304
# preprocessing and tokenization of the
305
305
# :func:`~sklearn.feature_extraction.text.CountVectorizer`.
@@ -322,15 +322,15 @@ def n_nonzero_columns(X):
322
322
# TfidfVectorizer
323
323
# ---------------
324
324
#
325
- # In a large text corpus, some words appear with higher frequency (e.g. “ the” ,
326
- # “a”, “is” in English) and do not carry meaningful information about the actual
325
+ # In a large text corpus, some words appear with higher frequency (e.g. " the" ,
326
+ # "a", "is" in English) and do not carry meaningful information about the actual
327
327
# contents of a document. If we were to feed the word count data directly to a
328
328
# classifier, those very common terms would shadow the frequencies of rarer yet
329
329
# more informative terms. In order to re-weight the count features into floating
330
330
# point values suitable for usage by a classifier it is very common to use the
331
- # tf– idf transform as implemented by the
331
+ # tf- idf transform as implemented by the
332
332
# :func:`~sklearn.feature_extraction.text.TfidfTransformer`. TF stands for
333
- # "term-frequency" while "tf– idf" means term-frequency times inverse
333
+ # "term-frequency" while "tf- idf" means term-frequency times inverse
334
334
# document-frequency.
335
335
#
336
336
# We now benchmark the :func:`~sklearn.feature_extraction.text.TfidfVectorizer`,
0 commit comments