From cccb39038e102c27781eb8a6ee8a32bf7be9e327 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Sun, 14 Nov 2021 11:19:53 +0100
Subject: [PATCH 1/5] Speed up plot_discretization_classification.py

---
 .../plot_discretization_classification.py     | 58 ++++++++++++-------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 59cdbdb766a12..f1a0e89d1f308 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -62,32 +62,49 @@ def get_name(estimator):
 
 # list of (estimator, param_grid), where param_grid is used in GridSearchCV
 classifiers = [
-    (LogisticRegression(random_state=0), {"C": np.logspace(-2, 7, 10)}),
-    (LinearSVC(random_state=0), {"C": np.logspace(-2, 7, 10)}),
+    (
+        make_pipeline(StandardScaler(), LogisticRegression(random_state=0)),
+        {"logisticregression__C": np.logspace(-1, 1, 3)},
+    ),
+    (
+        make_pipeline(StandardScaler(), LinearSVC(random_state=0)),
+        {"linearsvc__C": np.logspace(-1, 1, 3)},
+    ),
     (
         make_pipeline(
-            KBinsDiscretizer(encode="onehot"), LogisticRegression(random_state=0)
+            StandardScaler(),
+            KBinsDiscretizer(encode="onehot"),
+            LogisticRegression(random_state=0),
         ),
         {
-            "kbinsdiscretizer__n_bins": np.arange(2, 10),
-            "logisticregression__C": np.logspace(-2, 7, 10),
+            "kbinsdiscretizer__n_bins": np.arange(5, 8),
+            "logisticregression__C": np.logspace(-1, 1, 3),
         },
     ),
     (
-        make_pipeline(KBinsDiscretizer(encode="onehot"), LinearSVC(random_state=0)),
+        make_pipeline(
+            StandardScaler(),
+            KBinsDiscretizer(encode="onehot"),
+            LinearSVC(random_state=0),
+        ),
         {
-            "kbinsdiscretizer__n_bins": np.arange(2, 10),
-            "linearsvc__C": np.logspace(-2, 7, 10),
+            "kbinsdiscretizer__n_bins": np.arange(5, 8),
+            "linearsvc__C": np.logspace(-1, 1, 3),
         },
     ),
     (
-        GradientBoostingClassifier(n_estimators=50, random_state=0),
-        {"learning_rate": np.logspace(-4, 0, 10)},
+        make_pipeline(
+            StandardScaler(), GradientBoostingClassifier(n_estimators=5, random_state=0)
+        ),
+        {"gradientboostingclassifier__learning_rate": np.logspace(-2, 0, 5)},
+    ),
+    (
+        make_pipeline(StandardScaler(), SVC(random_state=0)),
+        {"svc__C": np.logspace(-1, 1, 3)},
     ),
-    (SVC(random_state=0), {"C": np.logspace(-2, 7, 10)}),
 ]
 
-names = [get_name(e) for e, g in classifiers]
+names = [get_name(e).replace("StandardScaler + ", "") for e, _ in classifiers]
 
 n_samples = 100
 datasets = [
@@ -107,15 +124,14 @@ def get_name(estimator):
     nrows=len(datasets), ncols=len(classifiers) + 1, figsize=(21, 9)
 )
 
-cm = plt.cm.PiYG
+cm_piyg = plt.cm.PiYG
 cm_bright = ListedColormap(["#b30065", "#178000"])
 
 # iterate over datasets
 for ds_cnt, (X, y) in enumerate(datasets):
-    print("\ndataset %d\n---------" % ds_cnt)
+    print(f"\ndataset {ds_cnt}\n---------")
 
-    # preprocess dataset, split into training and test part
-    X = StandardScaler().fit_transform(X)
+    # split into training and test part
     X_train, X_test, y_train, y_test = train_test_split(
         X, y, test_size=0.5, random_state=42
     )
@@ -148,18 +164,18 @@ def get_name(estimator):
         with ignore_warnings(category=ConvergenceWarning):
             clf.fit(X_train, y_train)
         score = clf.score(X_test, y_test)
-        print("%s: %.2f" % (name, score))
+        print(f"{name}: {score:.2f}")
 
         # plot the decision boundary. For that, we will assign a color to each
         # point in the mesh [x_min, x_max]*[y_min, y_max].
         if hasattr(clf, "decision_function"):
-            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+            Z = clf.decision_function(np.column_stack([xx.ravel(), yy.ravel()]))
         else:
-            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
+            Z = clf.predict_proba(np.column_stack([xx.ravel(), yy.ravel()]))[:, 1]
 
         # put the result into a color plot
         Z = Z.reshape(xx.shape)
-        ax.contourf(xx, yy, Z, cmap=cm, alpha=0.8)
+        ax.contourf(xx, yy, Z, cmap=cm_piyg, alpha=0.8)
 
         # plot the training points
         ax.scatter(
@@ -184,7 +200,7 @@ def get_name(estimator):
         ax.text(
             0.95,
             0.06,
-            ("%.2f" % score).lstrip("0"),
+            (f"{score:.2f}").lstrip("0"),
             size=15,
             bbox=dict(boxstyle="round", alpha=0.8, facecolor="white"),
             transform=ax.transAxes,

From 8323bf7e26666c59a1d3d4d24bc97b1e34cea69e Mon Sep 17 00:00:00 2001
From: Xiao Yuan <yuanx749@gmail.com>
Date: Mon, 15 Nov 2021 16:16:55 +0100
Subject: [PATCH 2/5] Add comment

---
 examples/preprocessing/plot_discretization_classification.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index f1a0e89d1f308..1ca16cfcbe3e7 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -61,6 +61,8 @@ def get_name(estimator):
 
 
 # list of (estimator, param_grid), where param_grid is used in GridSearchCV
+# The parameter spaces in this example are limited to a narrow band in order to speed up.
+# In a real use case, a broader search space for the algorithms should be used.
 classifiers = [
     (
         make_pipeline(StandardScaler(), LogisticRegression(random_state=0)),

From aafb038eb8a64add8c24eb4bdb4850b8f5ae86a2 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Mon, 15 Nov 2021 16:27:52 +0100
Subject: [PATCH 3/5] Reduce length of comment

---
 examples/preprocessing/plot_discretization_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 1ca16cfcbe3e7..09fc9712ff601 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -61,7 +61,7 @@ def get_name(estimator):
 
 
 # list of (estimator, param_grid), where param_grid is used in GridSearchCV
-# The parameter spaces in this example are limited to a narrow band in order to speed up.
+# The parameter spaces in this example are limited to a narrow band to speed up.
 # In a real use case, a broader search space for the algorithms should be used.
 classifiers = [
     (

From 69f5ff47e255c2d4638de2c6c9b8a64502ba3b89 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Mon, 15 Nov 2021 17:35:35 +0100
Subject: [PATCH 4/5] Improve comment

---
 .../preprocessing/plot_discretization_classification.py     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 09fc9712ff601..8cfd6d597b930 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -60,9 +60,11 @@ def get_name(estimator):
     return name
 
 
+# The parameter spaces in this example are limited to a narrow band to reduce
+# its runtime. In a real use case, a broader search space for the algorithms
+# should be used.
+
 # list of (estimator, param_grid), where param_grid is used in GridSearchCV
-# The parameter spaces in this example are limited to a narrow band to speed up.
-# In a real use case, a broader search space for the algorithms should be used.
 classifiers = [
     (
         make_pipeline(StandardScaler(), LogisticRegression(random_state=0)),

From fecca549e3f26359b74f4bdb9574ac1876a0bc50 Mon Sep 17 00:00:00 2001
From: yuanx749 <yuanx749@gmail.com>
Date: Mon, 15 Nov 2021 19:00:26 +0100
Subject: [PATCH 5/5] Merge main

---
 examples/preprocessing/plot_discretization_classification.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index 8cfd6d597b930..ff3d2973caff3 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -60,11 +60,10 @@ def get_name(estimator):
     return name
 
 
+# list of (estimator, param_grid), where param_grid is used in GridSearchCV
 # The parameter spaces in this example are limited to a narrow band to reduce
 # its runtime. In a real use case, a broader search space for the algorithms
 # should be used.
-
-# list of (estimator, param_grid), where param_grid is used in GridSearchCV
 classifiers = [
     (
         make_pipeline(StandardScaler(), LogisticRegression(random_state=0)),