From d189026b02f6fdbb7a66c59df7832b9d1fd793d2 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 12:09:24 +0200 Subject: [PATCH 01/19] Be more specific about logistic regression solver in examples --- .../calibration/plot_compare_calibration.py | 2 +- .../plot_classification_probability.py | 53 ++++++++++++------- .../plot_column_transformer_mixed_types.py | 2 +- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py index 2d9d0af0dcbc5..15dd0e57a3021 100644 --- a/examples/calibration/plot_compare_calibration.py +++ b/examples/calibration/plot_compare_calibration.py @@ -75,7 +75,7 @@ y_test = y[train_samples:] # Create classifiers -lr = LogisticRegression() +lr = LogisticRegression(solver='lbfgs') gnb = GaussianNB() svc = LinearSVC(C=1.0) rfc = RandomForestClassifier(n_estimators=100) diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py index 4542362817d71..6fb3785cb7ea5 100644 --- a/examples/classification/plot_classification_probability.py +++ b/examples/classification/plot_classification_probability.py @@ -3,13 +3,17 @@ Plot classification probability =============================== -Plot the classification probability for different classifiers. We use a 3 -class dataset, and we classify it with a Support Vector classifier, L1 -and L2 penalized logistic regression with either a One-Vs-Rest or multinomial -setting, and Gaussian process classification. +Plot the classification probability for different classifiers. We use a 3 class +dataset, and we classify it with a Support Vector classifier, L1 and L2 +penalized logistic regression with either a One-Vs-Rest or multinomial setting, +and Gaussian process classification. -The logistic regression is not a multiclass classifier out of the box. As -a result it can identify only the first class. +Linear SVC is not a probabilistic classifier by default but it has a built-in +calibration option enabled in this example (`probability=True`). + +The logistic regression with One-Vs-Rest is not a multiclass classifier out of +the box. As a result it has more trouvle in separating class 2 and 3 than the +other estimators. """ print(__doc__) @@ -19,6 +23,7 @@ class dataset, and we classify it with a Support Vector classifier, L1 import matplotlib.pyplot as plt import numpy as np +from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC from sklearn.gaussian_process import GaussianProcessClassifier @@ -31,19 +36,27 @@ class dataset, and we classify it with a Support Vector classifier, L1 n_features = X.shape[1] -C = 1.0 +C = 10 kernel = 1.0 * RBF([1.0, 1.0]) # for GPC -# Create different classifiers. The logistic regression cannot do -# multiclass out of the box. -classifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'), - 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'), - 'Linear SVC': SVC(kernel='linear', C=C, probability=True, - random_state=0), - 'L2 logistic (Multinomial)': LogisticRegression( - C=C, solver='lbfgs', multi_class='multinomial'), - 'GPC': GaussianProcessClassifier(kernel) - } +# Create different classifiers. +classifiers = { + 'L1 logistic': LogisticRegression(C=C, penalty='l1', + solver='saga', + multi_class='multinomial', + max_iter=10000), + 'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2', + solver='saga', + multi_class='multinomial', + max_iter=10000), + 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2', + solver='saga', + multi_class='ovr', + max_iter=10000), + 'Linear SVC': SVC(kernel='linear', C=C, probability=True, + random_state=0), + 'GPC': GaussianProcessClassifier(kernel) +} n_classifiers = len(classifiers) @@ -59,10 +72,10 @@ class dataset, and we classify it with a Support Vector classifier, L1 classifier.fit(X, y) y_pred = classifier.predict(X) - classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100 - print("classif_rate for %s : %f " % (name, classif_rate)) + accuracy = accuracy_score(y, y_pred) + print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100)) - # View probabilities= + # View probabilities: probas = classifier.predict_proba(Xfull) n_classes = np.unique(y_pred).size for k in range(n_classes): diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 73ee27f83a907..1da0c7e0d60e8 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -71,7 +71,7 @@ # Append classifier to preprocessing pipeline. # Now we have a full prediction pipeline. clf = Pipeline(steps=[('preprocessor', preprocessor), - ('classifier', LogisticRegression())]) + ('classifier', LogisticRegression(solver='lbfgs'))]) X = data.drop('survived', axis=1) y = data['survived'] From 4b7483fc15f813acd449ab5099082b1213d470d4 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 15:35:50 +0200 Subject: [PATCH 02/19] Use early stopped SGD (faster) and plot cross-validated error for best models --- examples/compose/plot_digits_pipe.py | 57 ++++++++++++++++++---------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index 2352abba4584e..d1758c168c511 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -22,42 +22,57 @@ import numpy as np import matplotlib.pyplot as plt +import pandas as pd -from sklearn import linear_model, decomposition, datasets +from sklearn import datasets +from sklearn.decomposition import PCA +from sklearn.linear_model import SGDClassifier from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV -logistic = linear_model.LogisticRegression() -pca = decomposition.PCA() +# Define a pipeline to search for the best combination of PCA truncation +# and classifier regularization. +logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True, + max_iter=10000, tol=1e-5, random_state=0) +pca = PCA() pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target +# Parameters of pipelines can be set using ‘__’ separated parameter names: +param_grid = { + 'pca__n_components': [5, 20, 30, 40, 50, 64], + 'logistic__alpha': np.logspace(-4, 4, 5), +} +search = GridSearchCV(pipe, param_grid, iid=False, cv=5, + return_train_score=False) +search.fit(X_digits, y_digits) +print("Best parameter (CV score=%0.3f):" % search.best_score_) +print(search.best_params_) + # Plot the PCA spectrum pca.fit(X_digits) -plt.figure(1, figsize=(4, 3)) -plt.clf() -plt.axes([.2, .2, .7, .7]) -plt.plot(pca.explained_variance_, linewidth=2) -plt.axis('tight') -plt.xlabel('n_components') -plt.ylabel('explained_variance_') +fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(4, 5)) +ax0.plot(pca.explained_variance_ratio_, linewidth=2) +ax0.set_ylabel('PCA explained variance') + +ax0.axvline(search.best_estimator_.named_steps['pca'].n_components, + linestyle=':', label='n_components chosen') +ax0.legend(prop=dict(size=12)) -# Prediction -n_components = [20, 40, 64] -Cs = np.logspace(-4, 4, 3) +# For each number of components, find the best classifier results +results = pd.DataFrame(search.cv_results_) +components_col = 'param_pca__n_components' +best_clfs = results.groupby(components_col).apply( + lambda g: g.nlargest(1, 'mean_test_score')) -# Parameters of pipelines can be set using ‘__’ separated parameter names: -estimator = GridSearchCV(pipe, - dict(pca__n_components=n_components, - logistic__C=Cs), cv=5) -estimator.fit(X_digits, y_digits) +best_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score', + legend=False, ax=ax1) +ax1.set_ylabel('Classification accuracy (val)') +ax1.set_xlabel('n_components') -plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components, - linestyle=':', label='n_components chosen') -plt.legend(prop=dict(size=12)) plt.show() From 447d6f2af50a2ceafd3618a4d9d0d77c943359ca Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 16:28:38 +0200 Subject: [PATCH 03/19] Fix LR solver in /plot_voting_probas.pyexamples/ensemble/plot_voting_probas.py --- examples/ensemble/plot_feature_transformation.py | 15 +++++++-------- examples/ensemble/plot_voting_probas.py | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py index 5dbc2754b3a35..085309ed2a942 100644 --- a/examples/ensemble/plot_feature_transformation.py +++ b/examples/ensemble/plot_feature_transformation.py @@ -42,19 +42,19 @@ n_estimator = 10 X, y = make_classification(n_samples=80000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) + # It is important to train the ensemble of trees on a different subset # of the training data than the linear regression model to avoid # overfitting, in particular if the total number of leaves is # similar to the number of training samples -X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train, - y_train, - test_size=0.5) +X_train, X_train_lr, y_train, y_train_lr = train_test_split( + X_train, y_train, test_size=0.5) # Unsupervised transformation based on totally random trees rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator, random_state=0) -rt_lm = LogisticRegression() +rt_lm = LogisticRegression(solver='lbfgs', max_iter=1000) pipeline = make_pipeline(rt, rt_lm) pipeline.fit(X_train, y_train) y_pred_rt = pipeline.predict_proba(X_test)[:, 1] @@ -63,7 +63,7 @@ # Supervised transformation based on random forests rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator) rf_enc = OneHotEncoder(categories='auto') -rf_lm = LogisticRegression() +rf_lm = LogisticRegression(solver='lbfgs', max_iter=1000) rf.fit(X_train, y_train) rf_enc.fit(rf.apply(X_train)) rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr) @@ -71,9 +71,10 @@ y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1] fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm) +# Supervised transformation based on gradient boosted trees grd = GradientBoostingClassifier(n_estimators=n_estimator) grd_enc = OneHotEncoder(categories='auto') -grd_lm = LogisticRegression() +grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000) grd.fit(X_train, y_train) grd_enc.fit(grd.apply(X_train)[:, :, 0]) grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr) @@ -82,12 +83,10 @@ grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1] fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm) - # The gradient boosted model by itself y_pred_grd = grd.predict_proba(X_test)[:, 1] fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd) - # The random forest model by itself y_pred_rf = rf.predict_proba(X_test)[:, 1] fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf) diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index c729818620a60..4916a00511702 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -29,7 +29,7 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import VotingClassifier -clf1 = LogisticRegression(random_state=123) +clf1 = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123) clf2 = RandomForestClassifier(n_estimators=100, random_state=123) clf3 = GaussianNB() X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) From c54c34263cb9511bbfae5b4b0ed6d6e281ae2f2b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 16:33:28 +0200 Subject: [PATCH 04/19] Fix LR solver & scale data in plot_digits_classification_exercise.py --- examples/exercises/plot_digits_classification_exercise.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py index 25ab7e71c5925..6651a1fa05783 100644 --- a/examples/exercises/plot_digits_classification_exercise.py +++ b/examples/exercises/plot_digits_classification_exercise.py @@ -15,7 +15,7 @@ from sklearn import datasets, neighbors, linear_model digits = datasets.load_digits() -X_digits = digits.data +X_digits = digits.data / digits.data.max() y_digits = digits.target n_samples = len(X_digits) @@ -26,7 +26,8 @@ y_test = y_digits[int(.9 * n_samples):] knn = neighbors.KNeighborsClassifier() -logistic = linear_model.LogisticRegression() +logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000, + multi_class='multinomial') print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test)) print('LogisticRegression score: %f' From c4bba067f4fd982f7bbd144ce253435b5c9684ab Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 16:37:43 +0200 Subject: [PATCH 05/19] Use saga solver in plot_logistic_l1_l2_sparsity.py --- examples/linear_model/plot_logistic_l1_l2_sparsity.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py index be63b144c260a..bffc648965fca 100644 --- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py +++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py @@ -37,10 +37,10 @@ # Set regularization parameter -for i, C in enumerate((100, 1, 0.01)): +for i, C in enumerate((1, 0.1, 0.01)): # turn down tolerance for short training time - clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01) - clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01) + clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01, solver='saga') + clf_l2_LR = LogisticRegression(C=C, penalty='l2', tol=0.01, solver='saga') clf_l1_LR.fit(X, y) clf_l2_LR.fit(X, y) From dfb94e2b932c93d531ca2215f9b1c3a66386294a Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 17:16:04 +0200 Subject: [PATCH 06/19] Use LBFGS solver in plot_iris_logistic.py --- examples/linear_model/plot_iris_logistic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index d2193e9907b56..d3790370bbde5 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -7,29 +7,29 @@ ========================================================= Show below is a logistic-regression classifiers decision boundaries on the +first two dimensions (sepal length and width) of the iris data: `iris `_ dataset. The datapoints are colored according to their labels. """ print(__doc__) - # Code source: Gaël Varoquaux # Modified for documentation by Jaques Grobler # License: BSD 3 clause import numpy as np import matplotlib.pyplot as plt -from sklearn import linear_model, datasets +from sklearn.linear_model import LogisticRegression +from sklearn import datasets # import some data to play with iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. +X /= X.max() # rescale to [0-1] range to speed-up convergence Y = iris.target -h = .02 # step size in the mesh - -logreg = linear_model.LogisticRegression(C=1e5) +logreg = LogisticRegression(C=1e5, solver='lbfgs') # we create an instance of Neighbours Classifier and fit the data. logreg.fit(X, Y) @@ -38,6 +38,7 @@ # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 +h = .02 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()]) From c856d9ed847921e54e6c4cbcd18a41a53c99bda1 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 17:19:17 +0200 Subject: [PATCH 07/19] Use LBFGS in plot_logistic.py --- examples/linear_model/plot_logistic.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index 488f1c3543a6a..171a7135844be 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -23,8 +23,7 @@ from sklearn import linear_model -# this is our test set, it's just a straight line with some -# Gaussian noise +# General a toy dataset:s it's just a straight line with some Gaussian noise: xmin, xmax = -5, 5 n_samples = 100 np.random.seed(0) @@ -34,8 +33,9 @@ X += .3 * np.random.normal(size=n_samples) X = X[:, np.newaxis] -# run the classifier -clf = linear_model.LogisticRegression(C=1e5) + +# Fit the classifier +clf = linear_model.LogisticRegression(C=1e5, solver='lbfgs') clf.fit(X, y) # and plot the result @@ -47,6 +47,8 @@ def model(x): return 1 / (1 + np.exp(-x)) + + loss = model(X_test * clf.coef_ + clf.intercept_).ravel() plt.plot(X_test, loss, color='red', linewidth=3) From 03102e0fe6fa26295cfd8064070dd25eff124528 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 17:52:53 +0200 Subject: [PATCH 08/19] Use SAGA solver for Logistic Regression Path example --- examples/linear_model/plot_logistic_path.py | 42 +++++++++++++++------ 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py index 66a1ab9bd0254..c9bbf8720545e 100644 --- a/examples/linear_model/plot_logistic_path.py +++ b/examples/linear_model/plot_logistic_path.py @@ -1,10 +1,28 @@ #!/usr/bin/env python """ -================================= -Path with L1- Logistic Regression -================================= +============================================== +Regularization path of L1- Logistic Regression +============================================== -Computes path on IRIS dataset. + +Train l1-penalized logistic regression models on binary classification problem +derived from the Iris dataset. + +The models are ordered from strongest regularized to least regularized. The 4 +coefficients of the models are collected and plotted as a "regularization +path": on the left-hand side of the figure (strong regularizers), all the +coefficients are exactly 0. When regularization gets progressively looser, +coefficients can get non-zero values one after the other. + +Here we choose the SAGA solver because it can efficiently optimize for the +Logistic Regression loss with a non-smooth, sparsity inducing l1 penalty. + +Also note that we set a low value for the tolerance to make sure that the +model has converged before collecting the coefficients. + +We also use warm_start=True which mean that the coefficients of the models +are reused to initialize the next model fit and therefore speed-up the +computation of the full-path. """ print(__doc__) @@ -12,7 +30,7 @@ # Author: Alexandre Gramfort # License: BSD 3 clause -from datetime import datetime +from time import time import numpy as np import matplotlib.pyplot as plt @@ -27,26 +45,28 @@ X = X[y != 2] y = y[y != 2] -X -= np.mean(X, 0) +X /= X.max() # Normalize X to speed-up convergence # ############################################################################# # Demo path functions -cs = l1_min_c(X, y, loss='log') * np.logspace(0, 3) +cs = l1_min_c(X, y, loss='log') * np.logspace(0, 7, 16) print("Computing regularization path ...") -start = datetime.now() -clf = linear_model.LogisticRegression(C=1.0, penalty='l1', tol=1e-6) +start = time() +clf = linear_model.LogisticRegression(penalty='l1', solver='saga', + tol=1e-6, max_iter=int(1e6), + warm_start=True) coefs_ = [] for c in cs: clf.set_params(C=c) clf.fit(X, y) coefs_.append(clf.coef_.ravel().copy()) -print("This took ", datetime.now() - start) +print("This took %0.3fs" % (time() - start)) coefs_ = np.array(coefs_) -plt.plot(np.log10(cs), coefs_) +plt.plot(np.log10(cs), coefs_, marker='o') ymin, ymax = plt.ylim() plt.xlabel('log(C)') plt.ylabel('Coefficients') From 3509ac1a99c5483d22e719ccf4ab0229c8d79e96 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 18:01:41 +0200 Subject: [PATCH 09/19] Use LBFGS solver in plot_classifier_chain_yeast.py --- examples/multioutput/plot_classifier_chain_yeast.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index cb3a5085e316d..afe0131926dea 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -54,14 +54,15 @@ # Fit an independent logistic regression model for each class using the # OneVsRestClassifier wrapper. -ovr = OneVsRestClassifier(LogisticRegression()) +base_lr = LogisticRegression(solver='lbfgs') +ovr = OneVsRestClassifier(base_lr) ovr.fit(X_train, Y_train) Y_pred_ovr = ovr.predict(X_test) ovr_jaccard_score = jaccard_similarity_score(Y_test, Y_pred_ovr) # Fit an ensemble of logistic regression classifier chains and take the # take the average prediction of all the chains. -chains = [ClassifierChain(LogisticRegression(), order='random', random_state=i) +chains = [ClassifierChain(base_lr, order='random', random_state=i) for i in range(10)] for chain in chains: chain.fit(X_train, Y_train) From ec2918e0def0fb0db73d8b8a7b73d8bc14d51782 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 18:36:06 +0200 Subject: [PATCH 10/19] Use LBFGS solver in plot_rbm_logistic_classification.py --- .../plot_rbm_logistic_classification.py | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index aa75ccc06d1f1..26223ad245214 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -40,6 +40,7 @@ from sklearn.model_selection import train_test_split from sklearn.neural_network import BernoulliRBM from sklearn.pipeline import Pipeline +from sklearn.base import clone # ############################################################################# @@ -67,29 +68,32 @@ def nudge_dataset(X, Y): [0, 0, 0], [0, 1, 0]]] - shift = lambda x, w: convolve(x.reshape((8, 8)), mode='constant', - weights=w).ravel() + def shift(x, w): + return convolve(x.reshape((8, 8)), mode='constant', weights=w).ravel() + X = np.concatenate([X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors]) Y = np.concatenate([Y for _ in range(5)], axis=0) return X, Y + # Load Data digits = datasets.load_digits() X = np.asarray(digits.data, 'float32') X, Y = nudge_dataset(X, digits.target) X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling -X_train, X_test, Y_train, Y_test = train_test_split(X, Y, - test_size=0.2, - random_state=0) +X_train, X_test, Y_train, Y_test = train_test_split( + X, Y, test_size=0.2, random_state=0) # Models we will use -logistic = linear_model.LogisticRegression() +logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=10000, + multi_class='multinomial') rbm = BernoulliRBM(random_state=0, verbose=True) -classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) +rbm_features_classifier = Pipeline( + steps=[('rbm', rbm), ('logistic', logistic)]) # ############################################################################# # Training @@ -102,28 +106,26 @@ def nudge_dataset(X, Y): # More components tend to give better prediction performance, but larger # fitting time rbm.n_components = 100 -logistic.C = 6000.0 +logistic.C = 6000 # Training RBM-Logistic Pipeline -classifier.fit(X_train, Y_train) +rbm_features_classifier.fit(X_train, Y_train) -# Training Logistic regression -logistic_classifier = linear_model.LogisticRegression(C=100.0) -logistic_classifier.fit(X_train, Y_train) +# Training the Logistic regression classifier directly on the pixel +raw_pixel_classifier = clone(logistic) +raw_pixel_classifier.C = 100. +raw_pixel_classifier.fit(X_train, Y_train) # ############################################################################# # Evaluation -print() +Y_pred = rbm_features_classifier.predict(X_test) print("Logistic regression using RBM features:\n%s\n" % ( - metrics.classification_report( - Y_test, - classifier.predict(X_test)))) + metrics.classification_report(Y_test, Y_pred))) +Y_pred = raw_pixel_classifier.predict(X_test) print("Logistic regression using raw pixel features:\n%s\n" % ( - metrics.classification_report( - Y_test, - logistic_classifier.predict(X_test)))) + metrics.classification_report(Y_test, Y_pred))) # ############################################################################# # Plotting From 30b56e5988b73d8d8c3cc8bfa0261786c8b4d875 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 18:43:14 +0200 Subject: [PATCH 11/19] typo --- examples/classification/plot_classification_probability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py index 6fb3785cb7ea5..ea4df9e6fb583 100644 --- a/examples/classification/plot_classification_probability.py +++ b/examples/classification/plot_classification_probability.py @@ -12,7 +12,7 @@ calibration option enabled in this example (`probability=True`). The logistic regression with One-Vs-Rest is not a multiclass classifier out of -the box. As a result it has more trouvle in separating class 2 and 3 than the +the box. As a result it has more trouble in separating class 2 and 3 than the other estimators. """ print(__doc__) From e76d5fc16fa6fe528fadfc375414e2eaf63c23a9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 19 Sep 2018 18:47:29 +0200 Subject: [PATCH 12/19] typo --- examples/linear_model/plot_logistic_path.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py index c9bbf8720545e..79b5522575eb0 100644 --- a/examples/linear_model/plot_logistic_path.py +++ b/examples/linear_model/plot_logistic_path.py @@ -5,8 +5,8 @@ ============================================== -Train l1-penalized logistic regression models on binary classification problem -derived from the Iris dataset. +Train l1-penalized logistic regression models on a binary classification +problem derived from the Iris dataset. The models are ordered from strongest regularized to least regularized. The 4 coefficients of the models are collected and plotted as a "regularization @@ -17,12 +17,12 @@ Here we choose the SAGA solver because it can efficiently optimize for the Logistic Regression loss with a non-smooth, sparsity inducing l1 penalty. -Also note that we set a low value for the tolerance to make sure that the -model has converged before collecting the coefficients. +Also note that we set a low value for the tolerance to make sure that the model +has converged before collecting the coefficients. -We also use warm_start=True which mean that the coefficients of the models -are reused to initialize the next model fit and therefore speed-up the -computation of the full-path. +We also use warm_start=True which means that the coefficients of the models are +reused to initialize the next model fit to speed-up the computation of the +full-path. """ print(__doc__) From 765826809f57f33d23c10ca7830cc615953ec3aa Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 10:27:25 +0200 Subject: [PATCH 13/19] Bump up pandas dependency to 0.17.1 --- .circleci/config.yml | 2 +- README.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e1e410c440314..d627636a35279 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -47,7 +47,7 @@ jobs: - SCIPY_VERSION: 0.14 - MATPLOTLIB_VERSION: 1.3 - SCIKIT_IMAGE_VERSION: 0.9.3 - - PANDAS_VERSION: 0.13.1 + - PANDAS_VERSION: 0.17.1 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh diff --git a/README.rst b/README.rst index fa2ef793b9e26..b967a07ccc0c9 100644 --- a/README.rst +++ b/README.rst @@ -57,7 +57,7 @@ scikit-learn requires: Scikit-learn 0.21 and later will require Python 3.5 or newer. For running the examples Matplotlib >= 1.3.1 is required. A few examples -require scikit-image >= 0.9.3 and a few examples require pandas >= 0.13.1. +require scikit-image >= 0.9.3 and a few examples require pandas >= 0.17.1. scikit-learn also uses CBLAS, the C interface to the Basic Linear Algebra Subprograms library. scikit-learn comes with a reference implementation, but From 080160620ef45d27d991e2f986a02d6c513a184a Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 11:31:53 +0200 Subject: [PATCH 14/19] Bump up examples minimal deps to match pandas 0.17.1 --- .circleci/config.yml | 9 ++++----- README.rst | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d627636a35279..6e9e75298e172 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -42,11 +42,10 @@ jobs: - MINICONDA_PATH: ~/miniconda - CONDA_ENV_NAME: testenv - PYTHON_VERSION: 2 - - NUMPY_VERSION: 1.8.2 - # XXX: plot_gpc_xor.py fails with scipy 0.13.3 - - SCIPY_VERSION: 0.14 - - MATPLOTLIB_VERSION: 1.3 - - SCIKIT_IMAGE_VERSION: 0.9.3 + - NUMPY_VERSION: 1.10 + - SCIPY_VERSION: 0.16 + - MATPLOTLIB_VERSION: 1.4 + - SCIKIT_IMAGE_VERSION: 0.11 - PANDAS_VERSION: 0.17.1 steps: - checkout diff --git a/README.rst b/README.rst index b967a07ccc0c9..b4d67af56eec8 100644 --- a/README.rst +++ b/README.rst @@ -56,8 +56,8 @@ scikit-learn requires: **Scikit-learn 0.20 is the last version to support Python2.7.** Scikit-learn 0.21 and later will require Python 3.5 or newer. -For running the examples Matplotlib >= 1.3.1 is required. A few examples -require scikit-image >= 0.9.3 and a few examples require pandas >= 0.17.1. +For running the examples Matplotlib >= 1.4 is required. A few examples +require scikit-image >= 0.11.3 and a few examples require pandas >= 0.17.1. scikit-learn also uses CBLAS, the C interface to the Basic Linear Algebra Subprograms library. scikit-learn comes with a reference implementation, but From 8134fa8d12201b16e630bd9db46c313af84141bb Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 11:48:20 +0200 Subject: [PATCH 15/19] Fix figure layout for plot_digits_pipe.py --- examples/compose/plot_digits_pipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index d1758c168c511..6e722c9861529 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -56,7 +56,7 @@ # Plot the PCA spectrum pca.fit(X_digits) -fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(4, 5)) +fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6)) ax0.plot(pca.explained_variance_ratio_, linewidth=2) ax0.set_ylabel('PCA explained variance') @@ -75,4 +75,5 @@ ax1.set_ylabel('Classification accuracy (val)') ax1.set_xlabel('n_components') +plt.tight_layout() plt.show() From 2539fc5537f4724e3d962ff4a46c64fa422eacd0 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 11:49:04 +0200 Subject: [PATCH 16/19] Version numbers are not decimal numbers --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6e9e75298e172..0e77f30d18ed7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,12 +41,12 @@ jobs: # Test examples run with minimal dependencies - MINICONDA_PATH: ~/miniconda - CONDA_ENV_NAME: testenv - - PYTHON_VERSION: 2 - - NUMPY_VERSION: 1.10 - - SCIPY_VERSION: 0.16 - - MATPLOTLIB_VERSION: 1.4 - - SCIKIT_IMAGE_VERSION: 0.11 - - PANDAS_VERSION: 0.17.1 + - PYTHON_VERSION: "2" + - NUMPY_VERSION: "1.10" + - SCIPY_VERSION: "0.16" + - MATPLOTLIB_VERSION: "1.4" + - SCIKIT_IMAGE_VERSION: "0.11" + - PANDAS_VERSION: "0.17.1" steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh From c7a5c05fc70e1dea1479efdc1cd0b323e10d4153 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 11:55:12 +0200 Subject: [PATCH 17/19] Set multinomial, no scaling to keep example simple, fix formatting of example doc --- examples/linear_model/plot_iris_logistic.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index d3790370bbde5..968598392722d 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -7,9 +7,9 @@ ========================================================= Show below is a logistic-regression classifiers decision boundaries on the -first two dimensions (sepal length and width) of the iris data: -`iris `_ dataset. The -datapoints are colored according to their labels. +first two dimensions (sepal length and width) of the `iris +`_ dataset. The datapoints +are colored according to their labels. """ print(__doc__) @@ -26,10 +26,9 @@ # import some data to play with iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. -X /= X.max() # rescale to [0-1] range to speed-up convergence Y = iris.target -logreg = LogisticRegression(C=1e5, solver='lbfgs') +logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial') # we create an instance of Neighbours Classifier and fit the data. logreg.fit(X, Y) From 2bbc6cd5a4bdd20483f52645f2f9c13177fa8061 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 13:28:54 +0200 Subject: [PATCH 18/19] Missing plt.tight_layout() in plot_voting_probas.py --- examples/ensemble/plot_voting_probas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 4916a00511702..e38a618da3782 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -79,4 +79,5 @@ plt.ylim([0, 1]) plt.title('Class probabilities for sample 1 by different classifiers') plt.legend([p1[0], p2[0]], ['class 1', 'class 2'], loc='upper left') +plt.tight_layout() plt.show() From 79a97f94f8f58eb3313b52d05bfc0b634c14aa95 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 20 Sep 2018 13:33:12 +0200 Subject: [PATCH 19/19] Missing plt.tight_layout() in plot_logistic.py --- examples/linear_model/plot_logistic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index 171a7135844be..6d94cb0548601 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -65,4 +65,5 @@ def model(x): plt.xlim(-4, 10) plt.legend(('Logistic Regression Model', 'Linear Regression Model'), loc="lower right", fontsize='small') +plt.tight_layout() plt.show()