From e41d5f4ec1c49b8b650bf510a1bae53e069cb8ee Mon Sep 17 00:00:00 2001 From: Yusuf Raji Date: Thu, 23 Feb 2023 20:40:12 +0100 Subject: [PATCH 1/2] Specify behaviour of None for TfIdfVectorizer max_features --- sklearn/feature_extraction/text.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 72f836c25839f..9b413b2fab65d 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -1874,6 +1874,7 @@ class TfidfVectorizer(CountVectorizer): max_features : int, default=None If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. + Otherwise, all features are used. This parameter is ignored if vocabulary is not None. From 2e61d96467094b9148a1985fcc7454307cfc781a Mon Sep 17 00:00:00 2001 From: Yusuf Raji Date: Thu, 23 Feb 2023 21:19:06 +0100 Subject: [PATCH 2/2] Update sklearn/feature_extraction/text.py Co-authored-by: Guillaume Lemaitre --- sklearn/feature_extraction/text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 9b413b2fab65d..c2a99a4583bbf 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -1873,7 +1873,7 @@ class TfidfVectorizer(CountVectorizer): max_features : int, default=None If not None, build a vocabulary that only consider the top - max_features ordered by term frequency across the corpus. + `max_features` ordered by term frequency across the corpus. Otherwise, all features are used. This parameter is ignored if vocabulary is not None.