From 7c728e08c5dfe82f0f82fe9ef6d6620c5f3e3471 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?=
 <eguenther@MacBook-Pro-von-Elisabeth.local>
Date: Tue, 1 Aug 2023 20:02:24 +0200
Subject: [PATCH 01/22] typo

---
 examples/datasets/plot_iris_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 16edcdf37b70d..9a7c0c3c13b77 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -48,7 +48,7 @@
 plt.xticks(())
 plt.yticks(())
 
-# To getter a better understanding of interaction of the dimensions
+# To get a better understanding of interaction of the dimensions
 # plot the first three PCA dimensions
 fig = plt.figure(1, figsize=(8, 6))
 ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)

From b289185e27b8aa8313df409c46981b4865ae7f36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?=
 <eguenther@MacBook-Pro-von-Elisabeth.local>
Date: Tue, 1 Aug 2023 20:41:04 +0200
Subject: [PATCH 02/22] add link to the example

---
 sklearn/datasets/_base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index c95c43a8ab942..ae9388679039a 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -667,6 +667,11 @@ def load_iris(*, return_X_y=False, as_frame=False):
     array([0, 0, 1])
     >>> list(data.target_names)
     ['setosa', 'versicolor', 'virginica']
+
+    See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more
+    detailed example of how to work with the iris dataset.
+
+
     """
     data_file_name = "iris.csv"
     data, target, target_names, fdescr = load_csv_data(

From c6b6026ecb02a6ae802d1609ddc9083fc7954b3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 1 Aug 2023 20:54:03 +0200
Subject: [PATCH 03/22] adapt wording: pca dimension instead of direction

---
 examples/datasets/plot_iris_dataset.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 9a7c0c3c13b77..e88fa6d8f48b5 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -48,6 +48,9 @@
 plt.xticks(())
 plt.yticks(())
 
+# %%
+# Plot a PCA representation of the iris dataset
+# -------------------------------------------------------------
 # To get a better understanding of interaction of the dimensions
 # plot the first three PCA dimensions
 fig = plt.figure(1, figsize=(8, 6))
@@ -64,7 +67,7 @@
     s=40,
 )
 
-ax.set_title("First three PCA directions")
+ax.set_title("First three PCA dimensions")
 ax.set_xlabel("1st eigenvector")
 ax.xaxis.set_ticklabels([])
 ax.set_ylabel("2nd eigenvector")

From ea18244870c10cee130794f9ce3927463f315728 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 1 Aug 2023 21:01:32 +0200
Subject: [PATCH 04/22] adapt PCA section

---
 examples/datasets/plot_iris_dataset.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index e88fa6d8f48b5..721e03855313c 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -19,6 +19,9 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
+# %%
+# Setup: Import the data and prepare the plot
+# -------------------------------------------------
 import matplotlib.pyplot as plt
 
 # unused but required import for doing 3d projections with matplotlib < 3.2
@@ -35,6 +38,10 @@
 x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
 
+
+# %%
+# Scatter Plot of the Iris datasett
+# -------------------------------------------------
 plt.figure(2, figsize=(8, 6))
 plt.clf()
 
@@ -49,10 +56,11 @@
 plt.yticks(())
 
 # %%
-# Plot a PCA representation of the iris dataset
+# Plot a PCA representation
 # -------------------------------------------------------------
-# To get a better understanding of interaction of the dimensions
-# plot the first three PCA dimensions
+# Let's apply a PCA to the iris dataset and then plot the first three
+# dimensions.
+# This will give us a better understanding of our analysis results.
 fig = plt.figure(1, figsize=(8, 6))
 ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)
 
@@ -76,3 +84,5 @@
 ax.zaxis.set_ticklabels([])
 
 plt.show()
+
+# %%

From edec86910c8544f15f8356b2d0066cac5cc9ef6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 1 Aug 2023 21:16:18 +0200
Subject: [PATCH 05/22] adapt format and correct a typo

---
 examples/datasets/plot_iris_dataset.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 721e03855313c..2f4b4378a80da 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -40,8 +40,8 @@
 
 
 # %%
-# Scatter Plot of the Iris datasett
-# -------------------------------------------------
+# Scatter Plot of the Iris dataset
+# --------------------------------
 plt.figure(2, figsize=(8, 6))
 plt.clf()
 
@@ -57,7 +57,7 @@
 
 # %%
 # Plot a PCA representation
-# -------------------------------------------------------------
+# -------------------------
 # Let's apply a PCA to the iris dataset and then plot the first three
 # dimensions.
 # This will give us a better understanding of our analysis results.

From 2e0d8ed1eba8406ed176cc2869517e7c4fa347f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Wed, 9 Aug 2023 12:26:44 +0200
Subject: [PATCH 06/22] fix docstring

---
 sklearn/datasets/_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index ae9388679039a..6d995c1f7c4bf 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -671,7 +671,6 @@ def load_iris(*, return_X_y=False, as_frame=False):
     See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more
     detailed example of how to work with the iris dataset.
 
-
     """
     data_file_name = "iris.csv"
     data, target, target_names, fdescr = load_csv_data(

From 0f2e5dd1a404ae30f1a8331798e5511f4136813f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Wed, 9 Aug 2023 12:55:16 +0200
Subject: [PATCH 07/22] add explanations to the plots

---
 examples/datasets/plot_iris_dataset.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 2f4b4378a80da..623d66973a3ab 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -55,6 +55,14 @@
 plt.xticks(())
 plt.yticks(())
 
+# Each point in the scatter plot refers to one of the 150 iris flowers
+# in the dataset, with the color indicating their respective type
+# (Setosa, Versicolour, and Virginica).
+# Just based on the the 2 dimensions used in this plot - sepal width
+# and sepal length - you can already see a patttern, but there's still
+# overlap. Let's apply a PCA analysis to bettter differentiatte between
+# the three types!
+
 # %%
 # Plot a PCA representation
 # -------------------------
@@ -85,4 +93,9 @@
 
 plt.show()
 
+# We've now applied a PCA analysis and plotted the irises
+# along the first three dimensions (= Eigenvectors).
+# Looks like the first dimension already does a prettty good job
+# in differentiating the types of irises!
+
 # %%

From d096b26d31d0c826e2dba93144a96dcdd72b0846 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 15:50:28 +0200
Subject: [PATCH 08/22] remove empty line in docstring (as suggested by
 guillaume)

---
 sklearn/datasets/_base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index 6d995c1f7c4bf..e6ef5d97d97b9 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -670,7 +670,6 @@ def load_iris(*, return_X_y=False, as_frame=False):
 
     See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more
     detailed example of how to work with the iris dataset.
-
     """
     data_file_name = "iris.csv"
     data, target, target_names, fdescr = load_csv_data(

From f81572b25efac10bc7b5fc9ca615f926bed674ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 15:54:04 +0200
Subject: [PATCH 09/22] adapt divider lines to fit text length (as suggested by
 Guillaume)

---
 examples/datasets/plot_iris_dataset.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 623d66973a3ab..18242dfc6abe4 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -1,7 +1,7 @@
 """
-=========================================================
+================
 The Iris Dataset
-=========================================================
+================
 This data sets consists of 3 different types of irises'
 (Setosa, Versicolour, and Virginica) petal and sepal
 length, stored in a 150x4 numpy.ndarray
@@ -21,7 +21,7 @@
 
 # %%
 # Setup: Import the data and prepare the plot
-# -------------------------------------------------
+# -------------------------------------------
 import matplotlib.pyplot as plt
 
 # unused but required import for doing 3d projections with matplotlib < 3.2

From a3d2bc91542106e071ac66b98062158260ca1238 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 15:56:44 +0200
Subject: [PATCH 10/22] move matplotlib import into the cell where it is used
 the first time (as suggested by Guillaume)

---
 examples/datasets/plot_iris_dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 18242dfc6abe4..e067fe12c0aac 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -22,11 +22,6 @@
 # %%
 # Setup: Import the data and prepare the plot
 # -------------------------------------------
-import matplotlib.pyplot as plt
-
-# unused but required import for doing 3d projections with matplotlib < 3.2
-import mpl_toolkits.mplot3d  # noqa: F401
-
 from sklearn import datasets
 from sklearn.decomposition import PCA
 
@@ -42,6 +37,11 @@
 # %%
 # Scatter Plot of the Iris dataset
 # --------------------------------
+import matplotlib.pyplot as plt  # noqa: E402
+
+# unused but required import for doing 3d projections with matplotlib < 3.2
+import mpl_toolkits.mplot3d  # noqa: F401, E402
+
 plt.figure(2, figsize=(8, 6))
 plt.clf()
 

From 3f8731e0f7194e87d5b4958875585b35024312cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 15:59:15 +0200
Subject: [PATCH 11/22] move PCA import into the cell where it is used the
 first time (as suggested by Guillaume)

---
 examples/datasets/plot_iris_dataset.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index e067fe12c0aac..413b09bf13c06 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -23,7 +23,6 @@
 # Setup: Import the data and prepare the plot
 # -------------------------------------------
 from sklearn import datasets
-from sklearn.decomposition import PCA
 
 # import some data to play with
 iris = datasets.load_iris()
@@ -69,6 +68,8 @@
 # Let's apply a PCA to the iris dataset and then plot the first three
 # dimensions.
 # This will give us a better understanding of our analysis results.
+from sklearn.decomposition import PCA  # noqa: E402
+
 fig = plt.figure(1, figsize=(8, 6))
 ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)
 

From d752fe656703e0c9cd7318c62d37c862351b0b75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:19:39 +0200
Subject: [PATCH 12/22] clean up code for better readability. First plot does
 not need X and y, assigning them is confusing. Also stick to default colors
 to make example easier

---
 examples/datasets/plot_iris_dataset.py | 39 ++++++++++----------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 413b09bf13c06..196dfe67ba284 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -20,17 +20,11 @@
 # License: BSD 3 clause
 
 # %%
-# Setup: Import the data and prepare the plot
-# -------------------------------------------
+# Setup: Import the iris dataset
+# ------------------------------
 from sklearn import datasets
 
-# import some data to play with
 iris = datasets.load_iris()
-X = iris.data[:, :2]  # we only take the first two features.
-y = iris.target
-
-x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
-y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
 
 
 # %%
@@ -41,19 +35,18 @@
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401, E402
 
+# Prepare the plot size
 plt.figure(2, figsize=(8, 6))
 plt.clf()
 
-# Plot the training points
-plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor="k")
-plt.xlabel("Sepal length")
-plt.ylabel("Sepal width")
+# Plot the training points across the 1st and 2nd feature
+# (sepal length and sepal width)
+plt.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
+plt.xlabel("Sepal Length")
+plt.ylabel("Sepal Width")
 
-plt.xlim(x_min, x_max)
-plt.ylim(y_min, y_max)
 plt.xticks(())
 plt.yticks(())
-
 # Each point in the scatter plot refers to one of the 150 iris flowers
 # in the dataset, with the color indicating their respective type
 # (Setosa, Versicolour, and Virginica).
@@ -65,8 +58,8 @@
 # %%
 # Plot a PCA representation
 # -------------------------
-# Let's apply a PCA to the iris dataset and then plot the first three
-# dimensions.
+# Let's apply a PCA to the iris dataset and then plot the irises
+# across the first three PCA dimensions.
 # This will give us a better understanding of our analysis results.
 from sklearn.decomposition import PCA  # noqa: E402
 
@@ -78,25 +71,23 @@
     X_reduced[:, 0],
     X_reduced[:, 1],
     X_reduced[:, 2],
-    c=y,
-    cmap=plt.cm.Set1,
-    edgecolor="k",
+    c=iris.target,
     s=40,
 )
 
 ax.set_title("First three PCA dimensions")
-ax.set_xlabel("1st eigenvector")
+ax.set_xlabel("1st Eigenvector")
 ax.xaxis.set_ticklabels([])
-ax.set_ylabel("2nd eigenvector")
+ax.set_ylabel("2nd Eigenvector")
 ax.yaxis.set_ticklabels([])
-ax.set_zlabel("3rd eigenvector")
+ax.set_zlabel("3rd Eigenvector")
 ax.zaxis.set_ticklabels([])
 
 plt.show()
 
 # We've now applied a PCA analysis and plotted the irises
 # along the first three dimensions (= Eigenvectors).
-# Looks like the first dimension already does a prettty good job
+# Looks like the first dimension already does a pretty good job
 # in differentiating the types of irises!
 
 # %%

From 7b3dc5efeddbc79bf6d8fbdb79da0abed8216a54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:20:38 +0200
Subject: [PATCH 13/22] remove typos: the 't' key on my keyboard is stuck :-/

---
 examples/datasets/plot_iris_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 196dfe67ba284..038b4b152aba4 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -51,8 +51,8 @@
 # in the dataset, with the color indicating their respective type
 # (Setosa, Versicolour, and Virginica).
 # Just based on the the 2 dimensions used in this plot - sepal width
-# and sepal length - you can already see a patttern, but there's still
-# overlap. Let's apply a PCA analysis to bettter differentiatte between
+# and sepal length - you can already see a pattern, but there's still
+# overlap. Let's apply a PCA analysis to better differentiate between
 # the three types!
 
 # %%

From 259a009da58ec05260b737d04626008fdf0ce239 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:25:45 +0200
Subject: [PATCH 14/22] adapt text for PCA (intro and plot description), as
 suggested by Guillaume

---
 examples/datasets/plot_iris_dataset.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 038b4b152aba4..5c1cb6ec94e69 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -52,15 +52,14 @@
 # (Setosa, Versicolour, and Virginica).
 # Just based on the the 2 dimensions used in this plot - sepal width
 # and sepal length - you can already see a pattern, but there's still
-# overlap. Let's apply a PCA analysis to better differentiate between
-# the three types!
+# overlap.
 
 # %%
 # Plot a PCA representation
 # -------------------------
-# Let's apply a PCA to the iris dataset and then plot the irises
-# across the first three PCA dimensions.
-# This will give us a better understanding of our analysis results.
+# Let's apply a Principal Component Analysis (PCA) to the iris dataset
+# and then plot the irises across the first three PCA dimensions.
+# This will allow us to better differentiate between the three types!
 from sklearn.decomposition import PCA  # noqa: E402
 
 fig = plt.figure(1, figsize=(8, 6))
@@ -84,10 +83,8 @@
 ax.zaxis.set_ticklabels([])
 
 plt.show()
-
-# We've now applied a PCA analysis and plotted the irises
-# along the first three dimensions (= Eigenvectors).
-# Looks like the first dimension already does a pretty good job
-# in differentiating the types of irises!
-
 # %%
+# PCA will create 3 new features that are a linear combination of the
+# 4 original features. In addition, this transform maximizes the variance.
+# With this transformation, we see that we can identify each species using
+# only the first feature (i.e. first eigenvalues).

From cfe9790b379d3e964710bae2bf76ae872e8abfea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:28:01 +0200
Subject: [PATCH 15/22] adapt title for first cell

---
 examples/datasets/plot_iris_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 5c1cb6ec94e69..85d8123de7d3d 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -20,8 +20,8 @@
 # License: BSD 3 clause
 
 # %%
-# Setup: Import the iris dataset
-# ------------------------------
+# Loading the iris dataset
+# ------------------------
 from sklearn import datasets
 
 iris = datasets.load_iris()

From 326bfd5c6f6f72c532501e7cb0ecd0088bfc777f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:38:30 +0200
Subject: [PATCH 16/22] adjust scatter plot to Guillaumes suggestion

---
 examples/datasets/plot_iris_dataset.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 85d8123de7d3d..50ade6066e8c5 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -41,12 +41,12 @@
 
 # Plot the training points across the 1st and 2nd feature
 # (sepal length and sepal width)
-plt.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
-plt.xlabel("Sepal Length")
-plt.ylabel("Sepal Width")
-
-plt.xticks(())
-plt.yticks(())
+_, ax = plt.subplots()
+scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
+ax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1])
+_ = ax.legend(
+    scatter.legend_elements()[0], iris.target_names, loc="lower right", title="Classes"
+)
 # Each point in the scatter plot refers to one of the 150 iris flowers
 # in the dataset, with the color indicating their respective type
 # (Setosa, Versicolour, and Virginica).

From 1ff67e0fb67df48d41c9566996d406f68737b47b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:45:07 +0200
Subject: [PATCH 17/22] improve text for the scatter plot: now that we have
 added a legend, we can better describe the types

---
 examples/datasets/plot_iris_dataset.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 50ade6066e8c5..b714579076b74 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -50,9 +50,10 @@
 # Each point in the scatter plot refers to one of the 150 iris flowers
 # in the dataset, with the color indicating their respective type
 # (Setosa, Versicolour, and Virginica).
-# Just based on the the 2 dimensions used in this plot - sepal width
-# and sepal length - you can already see a pattern, but there's still
-# overlap.
+# You can already see a pattern regarding the Setosa type, which is
+# easily identifiable based on its short and wide sepal. Only
+# considering these 2 dimensions, sepal width and length, there's still
+# overlap between the Versicolor and Virginica types.
 
 # %%
 # Plot a PCA representation

From db80506d516bdc6455bf657d94a246d44499ab27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Sat, 12 Aug 2023 16:53:18 +0200
Subject: [PATCH 18/22] trying to fix the failed checks: add empty line before
 next cell

---
 examples/datasets/plot_iris_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index b714579076b74..24eaf6a7ae3fd 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -84,6 +84,7 @@
 ax.zaxis.set_ticklabels([])
 
 plt.show()
+
 # %%
 # PCA will create 3 new features that are a linear combination of the
 # 4 original features. In addition, this transform maximizes the variance.

From 35464844a4d990ed506c6779ac1e948bf49de94f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 5 Sep 2023 21:39:00 +0200
Subject: [PATCH 19/22] add new cell for comments

---
 examples/datasets/plot_iris_dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 24eaf6a7ae3fd..dd7235cd5913e 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -47,6 +47,8 @@
 _ = ax.legend(
     scatter.legend_elements()[0], iris.target_names, loc="lower right", title="Classes"
 )
+
+# %%
 # Each point in the scatter plot refers to one of the 150 iris flowers
 # in the dataset, with the color indicating their respective type
 # (Setosa, Versicolour, and Virginica).

From d83f0e5a7cb144ac7d2e0b1d420980b6cd18bfb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 5 Sep 2023 21:39:34 +0200
Subject: [PATCH 20/22] remove the part where the plot size was specified, we
 don't need it

---
 examples/datasets/plot_iris_dataset.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index dd7235cd5913e..f6966734f104d 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -35,10 +35,6 @@
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401, E402
 
-# Prepare the plot size
-plt.figure(2, figsize=(8, 6))
-plt.clf()
-
 # Plot the training points across the 1st and 2nd feature
 # (sepal length and sepal width)
 _, ax = plt.subplots()

From 57b5be97d94849f3ff0443b67a6cbde33b04f035 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= <eguenther@posteo.de>
Date: Tue, 5 Sep 2023 21:44:22 +0200
Subject: [PATCH 21/22] add noqa because import is not done at the top of the
 file!

---
 examples/datasets/plot_iris_dataset.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index f6966734f104d..3611762061ebe 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -32,11 +32,6 @@
 # --------------------------------
 import matplotlib.pyplot as plt  # noqa: E402
 
-# unused but required import for doing 3d projections with matplotlib < 3.2
-import mpl_toolkits.mplot3d  # noqa: F401, E402
-
-# Plot the training points across the 1st and 2nd feature
-# (sepal length and sepal width)
 _, ax = plt.subplots()
 scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
 ax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1])
@@ -59,6 +54,10 @@
 # Let's apply a Principal Component Analysis (PCA) to the iris dataset
 # and then plot the irises across the first three PCA dimensions.
 # This will allow us to better differentiate between the three types!
+
+# unused but required import for doing 3d projections with matplotlib < 3.2
+import mpl_toolkits.mplot3d  # noqa: F401, E402
+
 from sklearn.decomposition import PCA  # noqa: E402
 
 fig = plt.figure(1, figsize=(8, 6))

From 474e8cc7b71da25135fe34a312fcd2e48e82bea2 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 7 Sep 2023 16:03:59 +0200
Subject: [PATCH 22/22] MAINT remove unecessary check

---
 examples/datasets/plot_iris_dataset.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 3611762061ebe..32aba8918547e 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -30,7 +30,7 @@
 # %%
 # Scatter Plot of the Iris dataset
 # --------------------------------
-import matplotlib.pyplot as plt  # noqa: E402
+import matplotlib.pyplot as plt
 
 _, ax = plt.subplots()
 scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
@@ -56,9 +56,9 @@
 # This will allow us to better differentiate between the three types!
 
 # unused but required import for doing 3d projections with matplotlib < 3.2
-import mpl_toolkits.mplot3d  # noqa: F401, E402
+import mpl_toolkits.mplot3d  # noqa: F401
 
-from sklearn.decomposition import PCA  # noqa: E402
+from sklearn.decomposition import PCA
 
 fig = plt.figure(1, figsize=(8, 6))
 ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)