diff --git a/examples/text/document_classification_20newsgroups.py b/examples/text/document_classification_20newsgroups.py index 20e8f16b0d9d3..f34bbd10cbe55 100644 --- a/examples/text/document_classification_20newsgroups.py +++ b/examples/text/document_classification_20newsgroups.py @@ -34,6 +34,7 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import HashingVectorizer +from sklearn.feature_selection import SelectFromModel from sklearn.feature_selection import SelectKBest, chi2 from sklearn.linear_model import RidgeClassifier from sklearn.pipeline import Pipeline @@ -85,7 +86,7 @@ def is_interactive(): - return not hasattr(sys.modules['__main__ '], '__file__') + return not hasattr(sys.modules['__main__'], '__file__') # work-around for Jupyter notebook and IPython console argv = [] if is_interactive() else sys.argv[1:] @@ -259,8 +260,8 @@ def benchmark(clf): print('=' * 80) print("%s penalty" % penalty.upper()) # Train Liblinear model - results.append(benchmark(LinearSVC(loss='l2', penalty=penalty, - dual=False, tol=1e-3))) + results.append(benchmark(LinearSVC(penalty=penalty, dual=False, + tol=1e-3))) # Train SGD model results.append(benchmark(SGDClassifier(alpha=.0001, n_iter=50, @@ -288,9 +289,9 @@ def benchmark(clf): # The smaller C, the stronger the regularization. # The more regularization, the more sparsity. results.append(benchmark(Pipeline([ - ('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)), - ('classification', LinearSVC()) -]))) + ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False, + tol=1e-3))), + ('classification', LinearSVC(penalty="l2"))]))) # make some plots