From 91ba96d7296d660f62317f722de9f865a009aa01 Mon Sep 17 00:00:00 2001 From: Kelly RM Date: Sat, 4 Mar 2017 13:18:56 -0500 Subject: [PATCH 1/2] attribute documentation --- sklearn/feature_extraction/text.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index f5b548a5278cd..e8088c8f162ae 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -189,7 +189,7 @@ def build_preprocessor(self): # hundreds of nanoseconds which is negligible when compared to the # cost of tokenizing a string of 1000 chars for instance. noop = lambda x: x - + # accent stripping if not self.strip_accents: strip_accents = noop @@ -996,6 +996,12 @@ class TfidfTransformer(BaseEstimator, TransformerMixin): sublinear_tf : boolean, default=False Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf). + Attributes + ---------- + idf_ : numpy array of shape [n_features,1] + returns None unless use_idf=True, then + returns 1-D matrix containing idf(d,t). + References ---------- @@ -1035,9 +1041,8 @@ def fit(self, X, y=None): # log+1 instead of log makes sure terms with zero idf don't get # suppressed entirely. idf = np.log(float(n_samples) / df) + 1.0 - self._idf_diag = sp.spdiags(idf, diags=0, m=n_features, + self._idf_diag = sp.spdiags(idf, diags=0, m=n_features, n=n_features, format='csr') - return self def transform(self, X, copy=True): From 864993a6d43c1297d6877121a9b238839abf834b Mon Sep 17 00:00:00 2001 From: "kellyrm945@gmail.com" Date: Sat, 4 Mar 2017 14:38:07 -0500 Subject: [PATCH 2/2] attribute documentation --- sklearn/feature_extraction/text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index e8088c8f162ae..2addb67560313 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -998,7 +998,7 @@ class TfidfTransformer(BaseEstimator, TransformerMixin): Attributes ---------- - idf_ : numpy array of shape [n_features,1] + idf_ : numpy array of shape [n_features, 1] returns None unless use_idf=True, then returns 1-D matrix containing idf(d,t).