From 44238a8e1e31969d65e2759f8088b025dbb40c44 Mon Sep 17 00:00:00 2001
From: rprkh <r.parikh@somaiya.edu>
Date: Thu, 26 Jan 2023 20:47:19 +0530
Subject: [PATCH 1/2] max_df range

---
 sklearn/feature_extraction/text.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index c03a4767a3330..ad83a134e28c8 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -996,8 +996,8 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator):
         will be removed from the resulting tokens.
         Only applies if ``analyzer == 'word'``.
 
-        If None, no stop words will be used. max_df can be set to a value
-        in the range [0.7, 1.0) to automatically detect and filter stop
+        If None, no stop words will be used. ``max_df`` can be set to a value
+        in the range [0.0, 1.0] to automatically detect and filter stop
         words based on intra corpus document frequency of terms.
 
     token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b"

From a96b911105af5ae45276b60dd814091b51e2da5b Mon Sep 17 00:00:00 2001
From: Rahil Parikh <r.parikh@somaiya.edu>
Date: Thu, 9 Feb 2023 18:01:34 +0530
Subject: [PATCH 2/2] add suggestions

---
 sklearn/feature_extraction/text.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index ad83a134e28c8..72f836c25839f 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -996,9 +996,9 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator):
         will be removed from the resulting tokens.
         Only applies if ``analyzer == 'word'``.
 
-        If None, no stop words will be used. ``max_df`` can be set to a value
-        in the range [0.0, 1.0] to automatically detect and filter stop
-        words based on intra corpus document frequency of terms.
+        If None, no stop words will be used. In this case, setting `max_df`
+        to a higher value, such as in the range (0.7, 1.0), can automatically detect
+        and filter stop words based on intra corpus document frequency of terms.
 
     token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b"
         Regular expression denoting what constitutes a "token", only used
@@ -1833,9 +1833,9 @@ class TfidfVectorizer(CountVectorizer):
         will be removed from the resulting tokens.
         Only applies if ``analyzer == 'word'``.
 
-        If None, no stop words will be used. max_df can be set to a value
-        in the range [0.7, 1.0) to automatically detect and filter stop
-        words based on intra corpus document frequency of terms.
+        If None, no stop words will be used. In this case, setting `max_df`
+        to a higher value, such as in the range (0.7, 1.0), can automatically detect
+        and filter stop words based on intra corpus document frequency of terms.
 
     token_pattern : str, default=r"(?u)\\b\\w\\w+\\b"
         Regular expression denoting what constitutes a "token", only used