From f6c67e16e4edf26d073e04b93880d04eaf0d3079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomasmoreyra@gmail.com>
Date: Sat, 26 Jun 2021 15:15:00 -0300
Subject: [PATCH 1/6] Remove TfIdfTransformer from DOCSTRING_IGNORE_LIST.

---
 maint_tools/test_docstrings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index cba5e8dfd2900..a8ceb36e9aa7e 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -202,7 +202,6 @@
     "StackingRegressor",
     "StandardScaler",
     "TSNE",
-    "TfidfTransformer",
     "TfidfVectorizer",
     "TheilSenRegressor",
     "TransformedTargetRegressor",

From e187f3c465df5737120f551c6cdbc5d146070db5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomasmoreyra@gmail.com>
Date: Sat, 26 Jun 2021 15:16:05 -0300
Subject: [PATCH 2/6] Fix numpydocs from TfidfTransformer.

---
 sklearn/feature_extraction/text.py | 55 ++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 14 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 0ae1956bef555..16a47a79e0426 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1395,7 +1395,7 @@ def _make_int_array():
 
 
 class TfidfTransformer(TransformerMixin, BaseEstimator):
-    """Transform a count matrix to a normalized tf or tf-idf representation
+    """Transform a count matrix to a normalized tf or tf-idf representation.
 
     Tf means term-frequency while tf-idf means term-frequency times inverse
     document-frequency. This is a common term weighting scheme in information
@@ -1445,7 +1445,7 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
         similarity between two vectors is their dot product when l2 norm has
         been applied.
         * 'l1': Sum of absolute values of vector elements is 1.
-        See :func:`preprocessing.normalize`
+        See :func:`preprocessing.normalize`.
 
     use_idf : bool, default=True
         Enable inverse-document-frequency reweighting.
@@ -1471,6 +1471,26 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
 
         .. versionadded:: 1.0
 
+    See Also
+    --------
+    CountVectorizer : Transforms text into a sparse matrix of n-gram counts.
+
+    TfidfVectorizer : Convert a collection of raw documents to a matrix of
+        TF-IDF features.
+
+    HashingVectorizer : Convert a collection of text documents to a matrix
+        of token occurrences.
+
+    References
+    ----------
+
+    .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern
+                   Information Retrieval. Addison Wesley, pp. 68-74.
+
+    .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze  (2008).
+                   Introduction to Information Retrieval. Cambridge University
+                   Press, pp. 118-120.
+
     Examples
     --------
     >>> from sklearn.feature_extraction.text import TfidfTransformer
@@ -1495,16 +1515,6 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
            1.        , 1.91629073, 1.91629073])
     >>> pipe.transform(corpus).shape
     (4, 8)
-
-    References
-    ----------
-
-    .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern
-                   Information Retrieval. Addison Wesley, pp. 68-74.
-
-    .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze  (2008).
-                   Introduction to Information Retrieval. Cambridge University
-                   Press, pp. 118-120.
     """
 
     def __init__(self, *, norm="l2", use_idf=True, smooth_idf=True, sublinear_tf=False):
@@ -1520,6 +1530,14 @@ def fit(self, X, y=None):
         ----------
         X : sparse matrix of shape n_samples, n_features)
             A matrix of term/token counts.
+
+        y : None
+            This parameter is not needed to compute tfidf.
+
+        Returns
+        -------
+        self : object
+            Fitted transformer.
         """
         X = self._validate_data(X, accept_sparse=("csr", "csc"))
         if not sp.issparse(X):
@@ -1549,12 +1567,12 @@ def fit(self, X, y=None):
         return self
 
     def transform(self, X, copy=True):
-        """Transform a count matrix to a tf or tf-idf representation
+        """Transform a count matrix to a tf or tf-idf representation.
 
         Parameters
         ----------
         X : sparse matrix of (n_samples, n_features)
-            a matrix of term/token counts
+            A matrix of term/token counts.
 
         copy : bool, default=True
             Whether to copy X and operate on the copy or perform in-place
@@ -1563,6 +1581,7 @@ def transform(self, X, copy=True):
         Returns
         -------
         vectors : sparse matrix of shape (n_samples, n_features)
+            Tf-idf-weighted document-term matrix.
         """
         X = self._validate_data(
             X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy, reset=False
@@ -1590,6 +1609,14 @@ def transform(self, X, copy=True):
 
     @property
     def idf_(self):
+        """Returns the inverse document frecuency (IDF) vector.
+
+        Returns
+        -------
+        idf_ : array of shape (n_features)
+            The inverse document frequency (IDF) vector; only defined
+            if  ``use_idf`` is True.
+        """
         # if _idf_diag is not set, this will raise an attribute error,
         # which means hasattr(self, "idf_") is False
         return np.ravel(self._idf_diag.sum(axis=0))

From 1e6f8a7da070f6d59c7ffd371179d0ccb89fc944 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomas.moreyra@mercadolibre.com>
Date: Sat, 26 Jun 2021 15:34:18 -0300
Subject: [PATCH 3/6] Update sklearn/feature_extraction/text.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_extraction/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 16a47a79e0426..2ceb96c3fc2a0 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1609,7 +1609,7 @@ def transform(self, X, copy=True):
 
     @property
     def idf_(self):
-        """Returns the inverse document frecuency (IDF) vector.
+        """Return the inverse document frecuency (IDF) vector.
 
         Returns
         -------

From 54e1fa3ca43c9f51dc50321033d7cb39260ee68d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomas.moreyra@mercadolibre.com>
Date: Sat, 26 Jun 2021 15:34:30 -0300
Subject: [PATCH 4/6] Update sklearn/feature_extraction/text.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_extraction/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 2ceb96c3fc2a0..5c43821a7288c 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1613,7 +1613,7 @@ def idf_(self):
 
         Returns
         -------
-        idf_ : array of shape (n_features)
+        idf_ : ndarray of shape (n_features,)
             The inverse document frequency (IDF) vector; only defined
             if  ``use_idf`` is True.
         """

From cfa95b4ba026e8997a36e01ebb7d118798ffa37c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomas.moreyra@mercadolibre.com>
Date: Sat, 26 Jun 2021 15:42:10 -0300
Subject: [PATCH 5/6] Update sklearn/feature_extraction/text.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_extraction/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 5c43821a7288c..25a8ddb0dc6aa 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1615,7 +1615,7 @@ def idf_(self):
         -------
         idf_ : ndarray of shape (n_features,)
             The inverse document frequency (IDF) vector; only defined
-            if  ``use_idf`` is True.
+            if  `use_idf` is True.
         """
         # if _idf_diag is not set, this will raise an attribute error,
         # which means hasattr(self, "idf_") is False

From 4cffd6670036fe133c5989d619ab339ad66d9b15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Moreyra?= <tomas.moreyra@mercadolibre.com>
Date: Sat, 26 Jun 2021 15:42:31 -0300
Subject: [PATCH 6/6] Update sklearn/feature_extraction/text.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_extraction/text.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 25a8ddb0dc6aa..63013d05a2f9d 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1532,7 +1532,7 @@ def fit(self, X, y=None):
             A matrix of term/token counts.
 
         y : None
-            This parameter is not needed to compute tfidf.
+            This parameter is not needed to compute tf-idf.
 
         Returns
         -------