scikit-learn · TomDLT · Nov 16, 2021 · Nov 14, 2021 · Nov 15, 2021 · Nov 15, 2021
diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
@@ -61,33 +61,53 @@ def get_name(estimator):
 
 
 # list of (estimator, param_grid), where param_grid is used in GridSearchCV
+# The parameter spaces in this example are limited to a narrow band to reduce
+# its runtime. In a real use case, a broader search space for the algorithms
+# should be used.
 classifiers = [
-    (LogisticRegression(random_state=0), {"C": np.logspace(-2, 7, 10)}),
-    (LinearSVC(random_state=0), {"C": np.logspace(-2, 7, 10)}),
+    (
+        make_pipeline(StandardScaler(), LogisticRegression(random_state=0)),
+        {"logisticregression__C": np.logspace(-1, 1, 3)},
+    ),
+    (
+        make_pipeline(StandardScaler(), LinearSVC(random_state=0)),
+        {"linearsvc__C": np.logspace(-1, 1, 3)},
+    ),
     (
         make_pipeline(
-            KBinsDiscretizer(encode="onehot"), LogisticRegression(random_state=0)
+            StandardScaler(),
+            KBinsDiscretizer(encode="onehot"),
+            LogisticRegression(random_state=0),
         ),
         {
-            "kbinsdiscretizer__n_bins": np.arange(2, 10),
-            "logisticregression__C": np.logspace(-2, 7, 10),
+            "kbinsdiscretizer__n_bins": np.arange(5, 8),
+            "logisticregression__C": np.logspace(-1, 1, 3),
         },
     ),
     (
-        make_pipeline(KBinsDiscretizer(encode="onehot"), LinearSVC(random_state=0)),
+        make_pipeline(
+            StandardScaler(),
+            KBinsDiscretizer(encode="onehot"),
+            LinearSVC(random_state=0),
+        ),
         {
-            "kbinsdiscretizer__n_bins": np.arange(2, 10),
-            "linearsvc__C": np.logspace(-2, 7, 10),
+            "kbinsdiscretizer__n_bins": np.arange(5, 8),
+            "linearsvc__C": np.logspace(-1, 1, 3),
         },
     ),
     (
-        GradientBoostingClassifier(n_estimators=50, random_state=0),
-        {"learning_rate": np.logspace(-4, 0, 10)},
+        make_pipeline(
+            StandardScaler(), GradientBoostingClassifier(n_estimators=5, random_state=0)
+        ),
+        {"gradientboostingclassifier__learning_rate": np.logspace(-2, 0, 5)},
+    ),
+    (
+        make_pipeline(StandardScaler(), SVC(random_state=0)),
+        {"svc__C": np.logspace(-1, 1, 3)},
     ),
-    (SVC(random_state=0), {"C": np.logspace(-2, 7, 10)}),
 ]
 
-names = [get_name(e) for e, g in classifiers]
+names = [get_name(e).replace("StandardScaler + ", "") for e, _ in classifiers]
 
 n_samples = 100
 datasets = [
@@ -107,15 +127,14 @@ def get_name(estimator):
     nrows=len(datasets), ncols=len(classifiers) + 1, figsize=(21, 9)
 )
 
-cm = plt.cm.PiYG
+cm_piyg = plt.cm.PiYG
 cm_bright = ListedColormap(["#b30065", "#178000"])
 
 # iterate over datasets
 for ds_cnt, (X, y) in enumerate(datasets):
-    print("\ndataset %d\n---------" % ds_cnt)
+    print(f"\ndataset {ds_cnt}\n---------")
 
-    # preprocess dataset, split into training and test part
-    X = StandardScaler().fit_transform(X)
+    # split into training and test part
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.5, random_state=42
     )
@@ -148,18 +167,18 @@ def get_name(estimator):
         with ignore_warnings(category=ConvergenceWarning):
             clf.fit(X_train, y_train)
         score = clf.score(X_test, y_test)
-        print("%s: %.2f" % (name, score))
+        print(f"{name}: {score:.2f}")
 
         # plot the decision boundary. For that, we will assign a color to each
         # point in the mesh [x_min, x_max]*[y_min, y_max].
         if hasattr(clf, "decision_function"):
-            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+            Z = clf.decision_function(np.column_stack([xx.ravel(), yy.ravel()]))
         else:
-            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+            Z = clf.predict_proba(np.column_stack([xx.ravel(), yy.ravel()]))[:, 1]
 
         # put the result into a color plot
         Z = Z.reshape(xx.shape)
-        ax.contourf(xx, yy, Z, cmap=cm, alpha=0.8)
+        ax.contourf(xx, yy, Z, cmap=cm_piyg, alpha=0.8)
 
         # plot the training points
         ax.scatter(
@@ -184,7 +203,7 @@ def get_name(estimator):
         ax.text(
             0.95,
             0.06,
-            ("%.2f" % score).lstrip("0"),
+            (f"{score:.2f}").lstrip("0"),
             size=15,
             bbox=dict(boxstyle="round", alpha=0.8, facecolor="white"),
             transform=ax.transAxes,