scikit-learn · amueller · Sep 24, 2018 · Sep 19, 2018 · Sep 19, 2018 · Sep 19, 2018
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -41,13 +41,12 @@ jobs:
       # Test examples run with minimal dependencies
       - MINICONDA_PATH: ~/miniconda
       - CONDA_ENV_NAME: testenv
-      - PYTHON_VERSION: 2
-      - NUMPY_VERSION: 1.8.2
-      # XXX: plot_gpc_xor.py fails with scipy 0.13.3
-      - SCIPY_VERSION: 0.14
-      - MATPLOTLIB_VERSION: 1.3
-      - SCIKIT_IMAGE_VERSION: 0.9.3
-      - PANDAS_VERSION: 0.13.1
+      - PYTHON_VERSION: "2"
+      - NUMPY_VERSION: "1.10"
+      - SCIPY_VERSION: "0.16"
+      - MATPLOTLIB_VERSION: "1.4"
+      - SCIKIT_IMAGE_VERSION: "0.11"
+      - PANDAS_VERSION: "0.17.1"
     steps:
       - checkout
       - run: ./build_tools/circle/checkout_merge_commit.sh

diff --git a/README.rst b/README.rst
@@ -56,8 +56,8 @@ scikit-learn requires:
 **Scikit-learn 0.20 is the last version to support Python2.7.**
 Scikit-learn 0.21 and later will require Python 3.5 or newer.
 
-For running the examples Matplotlib >= 1.3.1 is required. A few examples
-require scikit-image >= 0.9.3 and a few examples require pandas >= 0.13.1.
+For running the examples Matplotlib >= 1.4 is required. A few examples
+require scikit-image >= 0.11.3 and a few examples require pandas >= 0.17.1.
 
 scikit-learn also uses CBLAS, the C interface to the Basic Linear Algebra
 Subprograms library. scikit-learn comes with a reference implementation, but

diff --git a/examples/calibration/plot_compare_calibration.py b/examples/calibration/plot_compare_calibration.py
@@ -75,7 +75,7 @@
 y_test = y[train_samples:]
 
 # Create classifiers
-lr = LogisticRegression()
+lr = LogisticRegression(solver='lbfgs')
 gnb = GaussianNB()
 svc = LinearSVC(C=1.0)
 rfc = RandomForestClassifier(n_estimators=100)

diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
@@ -3,13 +3,17 @@
 Plot classification probability
 ===============================
 
-Plot the classification probability for different classifiers. We use a 3
-class dataset, and we classify it with a Support Vector classifier, L1
-and L2 penalized logistic regression with either a One-Vs-Rest or multinomial
-setting, and Gaussian process classification.
+Plot the classification probability for different classifiers. We use a 3 class
+dataset, and we classify it with a Support Vector classifier, L1 and L2
+penalized logistic regression with either a One-Vs-Rest or multinomial setting,
+and Gaussian process classification.
 
-The logistic regression is not a multiclass classifier out of the box. As
-a result it can identify only the first class.
+Linear SVC is not a probabilistic classifier by default but it has a built-in
+calibration option enabled in this example (`probability=True`).
+
+The logistic regression with One-Vs-Rest is not a multiclass classifier out of
+the box. As a result it has more trouble in separating class 2 and 3 than the
+other estimators.
 """
 print(__doc__)
 
@@ -19,6 +23,7 @@ class dataset, and we classify it with a Support Vector classifier, L1
 import matplotlib.pyplot as plt
 import numpy as np
 
+from sklearn.metrics import accuracy_score
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
 from sklearn.gaussian_process import GaussianProcessClassifier
@@ -31,19 +36,27 @@ class dataset, and we classify it with a Support Vector classifier, L1
 
 n_features = X.shape[1]
 
-C = 1.0
+C = 10
 kernel = 1.0 * RBF([1.0, 1.0])  # for GPC
 
-# Create different classifiers. The logistic regression cannot do
-# multiclass out of the box.
-classifiers = {'L1 logistic': LogisticRegression(C=C, penalty='l1'),
-               'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2'),
-               'Linear SVC': SVC(kernel='linear', C=C, probability=True,
-                                 random_state=0),
-               'L2 logistic (Multinomial)': LogisticRegression(
-                C=C, solver='lbfgs', multi_class='multinomial'),
-               'GPC': GaussianProcessClassifier(kernel)
-               }
+# Create different classifiers.
+classifiers = {
+    'L1 logistic': LogisticRegression(C=C, penalty='l1',
+                                      solver='saga',
+                                      multi_class='multinomial',
+                                      max_iter=10000),
+    'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',
+                                                    solver='saga',
+                                                    multi_class='multinomial',
+                                                    max_iter=10000),
+    'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',
+                                            solver='saga',
+                                            multi_class='ovr',
+                                            max_iter=10000),
+    'Linear SVC': SVC(kernel='linear', C=C, probability=True,
+                      random_state=0),
+    'GPC': GaussianProcessClassifier(kernel)
+}
 
 n_classifiers = len(classifiers)
 
@@ -59,10 +72,10 @@ class dataset, and we classify it with a Support Vector classifier, L1
     classifier.fit(X, y)
 
     y_pred = classifier.predict(X)
-    classif_rate = np.mean(y_pred.ravel() == y.ravel()) * 100
-    print("classif_rate for %s : %f " % (name, classif_rate))
+    accuracy = accuracy_score(y, y_pred)
+    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
 
-    # View probabilities=
+    # View probabilities:
     probas = classifier.predict_proba(Xfull)
     n_classes = np.unique(y_pred).size
     for k in range(n_classes):

diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
@@ -71,7 +71,7 @@
 # Append classifier to preprocessing pipeline.
 # Now we have a full prediction pipeline.
 clf = Pipeline(steps=[('preprocessor', preprocessor),
-                      ('classifier', LogisticRegression())])
+                      ('classifier', LogisticRegression(solver='lbfgs'))])
 
 X = data.drop('survived', axis=1)
 y = data['survived']

diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
@@ -22,42 +22,58 @@
 
 import numpy as np
 import matplotlib.pyplot as plt
+import pandas as pd
 
-from sklearn import linear_model, decomposition, datasets
+from sklearn import datasets
+from sklearn.decomposition import PCA
+from sklearn.linear_model import SGDClassifier
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
 
-logistic = linear_model.LogisticRegression()
 
-pca = decomposition.PCA()
+# Define a pipeline to search for the best combination of PCA truncation
+# and classifier regularization.
+logistic = SGDClassifier(loss='log', penalty='l2', early_stopping=True,
+                         max_iter=10000, tol=1e-5, random_state=0)
+pca = PCA()
 pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
 
 digits = datasets.load_digits()
 X_digits = digits.data
 y_digits = digits.target
 
+# Parameters of pipelines can be set using ‘__’ separated parameter names:
+param_grid = {
+    'pca__n_components': [5, 20, 30, 40, 50, 64],
+    'logistic__alpha': np.logspace(-4, 4, 5),
+}
+search = GridSearchCV(pipe, param_grid, iid=False, cv=5,
+                      return_train_score=False)
+search.fit(X_digits, y_digits)
+print("Best parameter (CV score=%0.3f):" % search.best_score_)
+print(search.best_params_)
+
 # Plot the PCA spectrum
 pca.fit(X_digits)
 
-plt.figure(1, figsize=(4, 3))
-plt.clf()
-plt.axes([.2, .2, .7, .7])
-plt.plot(pca.explained_variance_, linewidth=2)
-plt.axis('tight')
-plt.xlabel('n_components')
-plt.ylabel('explained_variance_')
+fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
+ax0.plot(pca.explained_variance_ratio_, linewidth=2)
+ax0.set_ylabel('PCA explained variance')
+
+ax0.axvline(search.best_estimator_.named_steps['pca'].n_components,
+            linestyle=':', label='n_components chosen')
+ax0.legend(prop=dict(size=12))
 
-# Prediction
-n_components = [20, 40, 64]
-Cs = np.logspace(-4, 4, 3)
+# For each number of components, find the best classifier results
+results = pd.DataFrame(search.cv_results_)
+components_col = 'param_pca__n_components'
+best_clfs = results.groupby(components_col).apply(
+    lambda g: g.nlargest(1, 'mean_test_score'))
 
-# Parameters of pipelines can be set using ‘__’ separated parameter names:
-estimator = GridSearchCV(pipe,
-                         dict(pca__n_components=n_components,
-                              logistic__C=Cs), cv=5)
-estimator.fit(X_digits, y_digits)
+best_clfs.plot(x=components_col, y='mean_test_score', yerr='std_test_score',
+               legend=False, ax=ax1)
+ax1.set_ylabel('Classification accuracy (val)')
+ax1.set_xlabel('n_components')
 
-plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components,
-            linestyle=':', label='n_components chosen')
-plt.legend(prop=dict(size=12))
+plt.tight_layout()
 plt.show()
diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
@@ -42,19 +42,19 @@
 n_estimator = 10
 X, y = make_classification(n_samples=80000)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+
 # It is important to train the ensemble of trees on a different subset
 # of the training data than the linear regression model to avoid
 # overfitting, in particular if the total number of leaves is
 # similar to the number of training samples
-X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
-                                                            y_train,
-                                                            test_size=0.5)
+X_train, X_train_lr, y_train, y_train_lr = train_test_split(
+    X_train, y_train, test_size=0.5)
 
 # Unsupervised transformation based on totally random trees
 rt = RandomTreesEmbedding(max_depth=3, n_estimators=n_estimator,
                           random_state=0)
 
-rt_lm = LogisticRegression()
+rt_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 pipeline = make_pipeline(rt, rt_lm)
 pipeline.fit(X_train, y_train)
 y_pred_rt = pipeline.predict_proba(X_test)[:, 1]
@@ -63,17 +63,18 @@
 # Supervised transformation based on random forests
 rf = RandomForestClassifier(max_depth=3, n_estimators=n_estimator)
 rf_enc = OneHotEncoder(categories='auto')
-rf_lm = LogisticRegression()
+rf_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 rf.fit(X_train, y_train)
 rf_enc.fit(rf.apply(X_train))
 rf_lm.fit(rf_enc.transform(rf.apply(X_train_lr)), y_train_lr)
 
 y_pred_rf_lm = rf_lm.predict_proba(rf_enc.transform(rf.apply(X_test)))[:, 1]
 fpr_rf_lm, tpr_rf_lm, _ = roc_curve(y_test, y_pred_rf_lm)
 
+# Supervised transformation based on gradient boosted trees
 grd = GradientBoostingClassifier(n_estimators=n_estimator)
 grd_enc = OneHotEncoder(categories='auto')
-grd_lm = LogisticRegression()
+grd_lm = LogisticRegression(solver='lbfgs', max_iter=1000)
 grd.fit(X_train, y_train)
 grd_enc.fit(grd.apply(X_train)[:, :, 0])
 grd_lm.fit(grd_enc.transform(grd.apply(X_train_lr)[:, :, 0]), y_train_lr)
@@ -82,12 +83,10 @@
     grd_enc.transform(grd.apply(X_test)[:, :, 0]))[:, 1]
 fpr_grd_lm, tpr_grd_lm, _ = roc_curve(y_test, y_pred_grd_lm)
 
-
 # The gradient boosted model by itself
 y_pred_grd = grd.predict_proba(X_test)[:, 1]
 fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)
 
-
 # The random forest model by itself
 y_pred_rf = rf.predict_proba(X_test)[:, 1]
 fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)

diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
@@ -29,7 +29,7 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import VotingClassifier
 
-clf1 = LogisticRegression(random_state=123)
+clf1 = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=123)
 clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
 clf3 = GaussianNB()
 X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
@@ -79,4 +79,5 @@
 plt.ylim([0, 1])
 plt.title('Class probabilities for sample 1 by different classifiers')
 plt.legend([p1[0], p2[0]], ['class 1', 'class 2'], loc='upper left')
+plt.tight_layout()
 plt.show()
diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py
@@ -15,7 +15,7 @@
 from sklearn import datasets, neighbors, linear_model
 
 digits = datasets.load_digits()
-X_digits = digits.data
+X_digits = digits.data / digits.data.max()
 y_digits = digits.target
 
 n_samples = len(X_digits)
@@ -26,7 +26,8 @@
 y_test = y_digits[int(.9 * n_samples):]
 
 knn = neighbors.KNeighborsClassifier()
-logistic = linear_model.LogisticRegression()
+logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=1000,
+                                           multi_class='multinomial')
 
 print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
 print('LogisticRegression score: %f'

diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
@@ -7,29 +7,28 @@
 =========================================================
 
 Show below is a logistic-regression classifiers decision boundaries on the
-`iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The
-datapoints are colored according to their labels.
+first two dimensions (sepal length and width) of the `iris
+<https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The datapoints
+are colored according to their labels.
 
 """
 print(__doc__)
 
-
 # Code source: Gaël Varoquaux
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import linear_model, datasets
+from sklearn.linear_model import LogisticRegression
+from sklearn import datasets
 
 # import some data to play with
 iris = datasets.load_iris()
 X = iris.data[:, :2]  # we only take the first two features.
 Y = iris.target
 
-h = .02  # step size in the mesh
-
-logreg = linear_model.LogisticRegression(C=1e5)
+logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
 
 # we create an instance of Neighbours Classifier and fit the data.
 logreg.fit(X, Y)
@@ -38,6 +37,7 @@
 # point in the mesh [x_min, x_max]x[y_min, y_max].
 x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
+h = .02  # step size in the mesh
 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
 

diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
@@ -23,8 +23,7 @@
 
 from sklearn import linear_model
 
-# this is our test set, it's just a straight line with some
-# Gaussian noise
+# General a toy dataset:s it's just a straight line with some Gaussian noise:
 xmin, xmax = -5, 5
 n_samples = 100
 np.random.seed(0)
@@ -34,8 +33,9 @@
 X += .3 * np.random.normal(size=n_samples)
 
 X = X[:, np.newaxis]
-# run the classifier
-clf = linear_model.LogisticRegression(C=1e5)
+
+# Fit the classifier
+clf = linear_model.LogisticRegression(C=1e5, solver='lbfgs')
 clf.fit(X, y)
 
 # and plot the result
@@ -47,6 +47,8 @@
 
 def model(x):
     return 1 / (1 + np.exp(-x))
+
+
 loss = model(X_test * clf.coef_ + clf.intercept_).ravel()
 plt.plot(X_test, loss, color='red', linewidth=3)
 
@@ -63,4 +65,5 @@ def model(x):
 plt.xlim(-4, 10)
 plt.legend(('Logistic Regression Model', 'Linear Regression Model'),
            loc="lower right", fontsize='small')
+plt.tight_layout()
 plt.show()