scikit-learn · jnothman · Dec 13, 2017 · Dec 13, 2017 · Dec 13, 2017
diff --git a/...t/document_classification_20newsgroups.py → ...t_document_classification_20newsgroups.py b/...t/document_classification_20newsgroups.py → ...t_document_classification_20newsgroups.py
@@ -88,6 +88,7 @@
 def is_interactive():
     return not hasattr(sys.modules['__main__'], '__file__')
 
+
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]
 (opts, args) = op.parse_args(argv)
@@ -136,6 +137,7 @@ def is_interactive():
 def size_mb(docs):
     return sum(len(s.encode('utf-8')) for s in docs) / 1e6
 
+
 data_train_size_mb = size_mb(data_train.data)
 data_test_size_mb = size_mb(data_test.data)
 

diff --git a/examples/text/document_clustering.py → examples/text/plot_document_clustering.py b/examples/text/document_clustering.py → examples/text/plot_document_clustering.py
@@ -27,8 +27,8 @@
 Two algorithms are demoed: ordinary k-means and its more scalable cousin
 minibatch k-means.
 
-Additionally, latent semantic analysis can also be used to reduce dimensionality
-and discover latent patterns in the data.
+Additionally, latent semantic analysis can also be used to reduce
+dimensionality and discover latent patterns in the data.
 
 It can be noted that k-means (and minibatch k-means) are very sensitive to
 feature scaling and that in this case the IDF weighting helps improve the
@@ -106,6 +106,7 @@
 def is_interactive():
     return not hasattr(sys.modules['__main__'], '__file__')
 
+
 # work-around for Jupyter notebook and IPython console
 argv = [] if is_interactive() else sys.argv[1:]
 (opts, args) = op.parse_args(argv)
@@ -138,7 +139,8 @@ def is_interactive():
 labels = dataset.target
 true_k = np.unique(labels).shape[0]
 
-print("Extracting features from the training dataset using a sparse vectorizer")
+print("Extracting features from the training dataset "
+      "using a sparse vectorizer")
 t0 = time()
 if opts.use_hashing:
     if opts.use_idf:

diff --git a/examples/text/hashing_vs_dict_vectorizer.py → ...s/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/hashing_vs_dict_vectorizer.py → ...s/text/plot_hashing_vs_dict_vectorizer.py
@@ -62,7 +62,7 @@ def token_freqs(doc):
     'talk.religion.misc',
 ]
 # Uncomment the following line to use a larger set (11k+ documents)
-#categories = None
+# categories = None
 
 print(__doc__)
 print("Usage: %s [n_features_for_hashing]" % sys.argv[0])