From 7c728e08c5dfe82f0f82fe9ef6d6620c5f3e3471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 1 Aug 2023 20:02:24 +0200 Subject: [PATCH 01/22] typo --- examples/datasets/plot_iris_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 16edcdf37b70d..9a7c0c3c13b77 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -48,7 +48,7 @@ plt.xticks(()) plt.yticks(()) -# To getter a better understanding of interaction of the dimensions +# To get a better understanding of interaction of the dimensions # plot the first three PCA dimensions fig = plt.figure(1, figsize=(8, 6)) ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110) From b289185e27b8aa8313df409c46981b4865ae7f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 1 Aug 2023 20:41:04 +0200 Subject: [PATCH 02/22] add link to the example --- sklearn/datasets/_base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index c95c43a8ab942..ae9388679039a 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -667,6 +667,11 @@ def load_iris(*, return_X_y=False, as_frame=False): array([0, 0, 1]) >>> list(data.target_names) ['setosa', 'versicolor', 'virginica'] + + See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more + detailed example of how to work with the iris dataset. + + """ data_file_name = "iris.csv" data, target, target_names, fdescr = load_csv_data( From c6b6026ecb02a6ae802d1609ddc9083fc7954b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 1 Aug 2023 20:54:03 +0200 Subject: [PATCH 03/22] adapt wording: pca dimension instead of direction --- examples/datasets/plot_iris_dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 9a7c0c3c13b77..e88fa6d8f48b5 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -48,6 +48,9 @@ plt.xticks(()) plt.yticks(()) +# %% +# Plot a PCA representation of the iris dataset +# ------------------------------------------------------------- # To get a better understanding of interaction of the dimensions # plot the first three PCA dimensions fig = plt.figure(1, figsize=(8, 6)) @@ -64,7 +67,7 @@ s=40, ) -ax.set_title("First three PCA directions") +ax.set_title("First three PCA dimensions") ax.set_xlabel("1st eigenvector") ax.xaxis.set_ticklabels([]) ax.set_ylabel("2nd eigenvector") From ea18244870c10cee130794f9ce3927463f315728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 1 Aug 2023 21:01:32 +0200 Subject: [PATCH 04/22] adapt PCA section --- examples/datasets/plot_iris_dataset.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index e88fa6d8f48b5..721e03855313c 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -19,6 +19,9 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause +# %% +# Setup: Import the data and prepare the plot +# ------------------------------------------------- import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 @@ -35,6 +38,10 @@ x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 + +# %% +# Scatter Plot of the Iris datasett +# ------------------------------------------------- plt.figure(2, figsize=(8, 6)) plt.clf() @@ -49,10 +56,11 @@ plt.yticks(()) # %% -# Plot a PCA representation of the iris dataset +# Plot a PCA representation # ------------------------------------------------------------- -# To get a better understanding of interaction of the dimensions -# plot the first three PCA dimensions +# Let's apply a PCA to the iris dataset and then plot the first three +# dimensions. +# This will give us a better understanding of our analysis results. fig = plt.figure(1, figsize=(8, 6)) ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110) @@ -76,3 +84,5 @@ ax.zaxis.set_ticklabels([]) plt.show() + +# %% From edec86910c8544f15f8356b2d0066cac5cc9ef6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 1 Aug 2023 21:16:18 +0200 Subject: [PATCH 05/22] adapt format and correct a typo --- examples/datasets/plot_iris_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 721e03855313c..2f4b4378a80da 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -40,8 +40,8 @@ # %% -# Scatter Plot of the Iris datasett -# ------------------------------------------------- +# Scatter Plot of the Iris dataset +# -------------------------------- plt.figure(2, figsize=(8, 6)) plt.clf() @@ -57,7 +57,7 @@ # %% # Plot a PCA representation -# ------------------------------------------------------------- +# ------------------------- # Let's apply a PCA to the iris dataset and then plot the first three # dimensions. # This will give us a better understanding of our analysis results. From 2e0d8ed1eba8406ed176cc2869517e7c4fa347f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Wed, 9 Aug 2023 12:26:44 +0200 Subject: [PATCH 06/22] fix docstring --- sklearn/datasets/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index ae9388679039a..6d995c1f7c4bf 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -671,7 +671,6 @@ def load_iris(*, return_X_y=False, as_frame=False): See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more detailed example of how to work with the iris dataset. - """ data_file_name = "iris.csv" data, target, target_names, fdescr = load_csv_data( From 0f2e5dd1a404ae30f1a8331798e5511f4136813f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Wed, 9 Aug 2023 12:55:16 +0200 Subject: [PATCH 07/22] add explanations to the plots --- examples/datasets/plot_iris_dataset.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 2f4b4378a80da..623d66973a3ab 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -55,6 +55,14 @@ plt.xticks(()) plt.yticks(()) +# Each point in the scatter plot refers to one of the 150 iris flowers +# in the dataset, with the color indicating their respective type +# (Setosa, Versicolour, and Virginica). +# Just based on the the 2 dimensions used in this plot - sepal width +# and sepal length - you can already see a patttern, but there's still +# overlap. Let's apply a PCA analysis to bettter differentiatte between +# the three types! + # %% # Plot a PCA representation # ------------------------- @@ -85,4 +93,9 @@ plt.show() +# We've now applied a PCA analysis and plotted the irises +# along the first three dimensions (= Eigenvectors). +# Looks like the first dimension already does a prettty good job +# in differentiating the types of irises! + # %% From d096b26d31d0c826e2dba93144a96dcdd72b0846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 15:50:28 +0200 Subject: [PATCH 08/22] remove empty line in docstring (as suggested by guillaume) --- sklearn/datasets/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 6d995c1f7c4bf..e6ef5d97d97b9 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -670,7 +670,6 @@ def load_iris(*, return_X_y=False, as_frame=False): See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more detailed example of how to work with the iris dataset. - """ data_file_name = "iris.csv" data, target, target_names, fdescr = load_csv_data( From f81572b25efac10bc7b5fc9ca615f926bed674ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 15:54:04 +0200 Subject: [PATCH 09/22] adapt divider lines to fit text length (as suggested by Guillaume) --- examples/datasets/plot_iris_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 623d66973a3ab..18242dfc6abe4 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -1,7 +1,7 @@ """ -========================================================= +================ The Iris Dataset -========================================================= +================ This data sets consists of 3 different types of irises' (Setosa, Versicolour, and Virginica) petal and sepal length, stored in a 150x4 numpy.ndarray @@ -21,7 +21,7 @@ # %% # Setup: Import the data and prepare the plot -# ------------------------------------------------- +# ------------------------------------------- import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 From a3d2bc91542106e071ac66b98062158260ca1238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 15:56:44 +0200 Subject: [PATCH 10/22] move matplotlib import into the cell where it is used the first time (as suggested by Guillaume) --- examples/datasets/plot_iris_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 18242dfc6abe4..e067fe12c0aac 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -22,11 +22,6 @@ # %% # Setup: Import the data and prepare the plot # ------------------------------------------- -import matplotlib.pyplot as plt - -# unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401 - from sklearn import datasets from sklearn.decomposition import PCA @@ -42,6 +37,11 @@ # %% # Scatter Plot of the Iris dataset # -------------------------------- +import matplotlib.pyplot as plt # noqa: E402 + +# unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401, E402 + plt.figure(2, figsize=(8, 6)) plt.clf() From 3f8731e0f7194e87d5b4958875585b35024312cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 15:59:15 +0200 Subject: [PATCH 11/22] move PCA import into the cell where it is used the first time (as suggested by Guillaume) --- examples/datasets/plot_iris_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index e067fe12c0aac..413b09bf13c06 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -23,7 +23,6 @@ # Setup: Import the data and prepare the plot # ------------------------------------------- from sklearn import datasets -from sklearn.decomposition import PCA # import some data to play with iris = datasets.load_iris() @@ -69,6 +68,8 @@ # Let's apply a PCA to the iris dataset and then plot the first three # dimensions. # This will give us a better understanding of our analysis results. +from sklearn.decomposition import PCA # noqa: E402 + fig = plt.figure(1, figsize=(8, 6)) ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110) From d752fe656703e0c9cd7318c62d37c862351b0b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:19:39 +0200 Subject: [PATCH 12/22] clean up code for better readability. First plot does not need X and y, assigning them is confusing. Also stick to default colors to make example easier --- examples/datasets/plot_iris_dataset.py | 39 ++++++++++---------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 413b09bf13c06..196dfe67ba284 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -20,17 +20,11 @@ # License: BSD 3 clause # %% -# Setup: Import the data and prepare the plot -# ------------------------------------------- +# Setup: Import the iris dataset +# ------------------------------ from sklearn import datasets -# import some data to play with iris = datasets.load_iris() -X = iris.data[:, :2] # we only take the first two features. -y = iris.target - -x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 -y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 # %% @@ -41,19 +35,18 @@ # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401, E402 +# Prepare the plot size plt.figure(2, figsize=(8, 6)) plt.clf() -# Plot the training points -plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor="k") -plt.xlabel("Sepal length") -plt.ylabel("Sepal width") +# Plot the training points across the 1st and 2nd feature +# (sepal length and sepal width) +plt.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target) +plt.xlabel("Sepal Length") +plt.ylabel("Sepal Width") -plt.xlim(x_min, x_max) -plt.ylim(y_min, y_max) plt.xticks(()) plt.yticks(()) - # Each point in the scatter plot refers to one of the 150 iris flowers # in the dataset, with the color indicating their respective type # (Setosa, Versicolour, and Virginica). @@ -65,8 +58,8 @@ # %% # Plot a PCA representation # ------------------------- -# Let's apply a PCA to the iris dataset and then plot the first three -# dimensions. +# Let's apply a PCA to the iris dataset and then plot the irises +# across the first three PCA dimensions. # This will give us a better understanding of our analysis results. from sklearn.decomposition import PCA # noqa: E402 @@ -78,25 +71,23 @@ X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], - c=y, - cmap=plt.cm.Set1, - edgecolor="k", + c=iris.target, s=40, ) ax.set_title("First three PCA dimensions") -ax.set_xlabel("1st eigenvector") +ax.set_xlabel("1st Eigenvector") ax.xaxis.set_ticklabels([]) -ax.set_ylabel("2nd eigenvector") +ax.set_ylabel("2nd Eigenvector") ax.yaxis.set_ticklabels([]) -ax.set_zlabel("3rd eigenvector") +ax.set_zlabel("3rd Eigenvector") ax.zaxis.set_ticklabels([]) plt.show() # We've now applied a PCA analysis and plotted the irises # along the first three dimensions (= Eigenvectors). -# Looks like the first dimension already does a prettty good job +# Looks like the first dimension already does a pretty good job # in differentiating the types of irises! # %% From 7b3dc5efeddbc79bf6d8fbdb79da0abed8216a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:20:38 +0200 Subject: [PATCH 13/22] remove typos: the 't' key on my keyboard is stuck :-/ --- examples/datasets/plot_iris_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 196dfe67ba284..038b4b152aba4 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -51,8 +51,8 @@ # in the dataset, with the color indicating their respective type # (Setosa, Versicolour, and Virginica). # Just based on the the 2 dimensions used in this plot - sepal width -# and sepal length - you can already see a patttern, but there's still -# overlap. Let's apply a PCA analysis to bettter differentiatte between +# and sepal length - you can already see a pattern, but there's still +# overlap. Let's apply a PCA analysis to better differentiate between # the three types! # %% From 259a009da58ec05260b737d04626008fdf0ce239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:25:45 +0200 Subject: [PATCH 14/22] adapt text for PCA (intro and plot description), as suggested by Guillaume --- examples/datasets/plot_iris_dataset.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 038b4b152aba4..5c1cb6ec94e69 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -52,15 +52,14 @@ # (Setosa, Versicolour, and Virginica). # Just based on the the 2 dimensions used in this plot - sepal width # and sepal length - you can already see a pattern, but there's still -# overlap. Let's apply a PCA analysis to better differentiate between -# the three types! +# overlap. # %% # Plot a PCA representation # ------------------------- -# Let's apply a PCA to the iris dataset and then plot the irises -# across the first three PCA dimensions. -# This will give us a better understanding of our analysis results. +# Let's apply a Principal Component Analysis (PCA) to the iris dataset +# and then plot the irises across the first three PCA dimensions. +# This will allow us to better differentiate between the three types! from sklearn.decomposition import PCA # noqa: E402 fig = plt.figure(1, figsize=(8, 6)) @@ -84,10 +83,8 @@ ax.zaxis.set_ticklabels([]) plt.show() - -# We've now applied a PCA analysis and plotted the irises -# along the first three dimensions (= Eigenvectors). -# Looks like the first dimension already does a pretty good job -# in differentiating the types of irises! - # %% +# PCA will create 3 new features that are a linear combination of the +# 4 original features. In addition, this transform maximizes the variance. +# With this transformation, we see that we can identify each species using +# only the first feature (i.e. first eigenvalues). From cfe9790b379d3e964710bae2bf76ae872e8abfea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:28:01 +0200 Subject: [PATCH 15/22] adapt title for first cell --- examples/datasets/plot_iris_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 5c1cb6ec94e69..85d8123de7d3d 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -20,8 +20,8 @@ # License: BSD 3 clause # %% -# Setup: Import the iris dataset -# ------------------------------ +# Loading the iris dataset +# ------------------------ from sklearn import datasets iris = datasets.load_iris() From 326bfd5c6f6f72c532501e7cb0ecd0088bfc777f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:38:30 +0200 Subject: [PATCH 16/22] adjust scatter plot to Guillaumes suggestion --- examples/datasets/plot_iris_dataset.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 85d8123de7d3d..50ade6066e8c5 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -41,12 +41,12 @@ # Plot the training points across the 1st and 2nd feature # (sepal length and sepal width) -plt.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target) -plt.xlabel("Sepal Length") -plt.ylabel("Sepal Width") - -plt.xticks(()) -plt.yticks(()) +_, ax = plt.subplots() +scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target) +ax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1]) +_ = ax.legend( + scatter.legend_elements()[0], iris.target_names, loc="lower right", title="Classes" +) # Each point in the scatter plot refers to one of the 150 iris flowers # in the dataset, with the color indicating their respective type # (Setosa, Versicolour, and Virginica). From 1ff67e0fb67df48d41c9566996d406f68737b47b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:45:07 +0200 Subject: [PATCH 17/22] improve text for the scatter plot: now that we have added a legend, we can better describe the types --- examples/datasets/plot_iris_dataset.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 50ade6066e8c5..b714579076b74 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -50,9 +50,10 @@ # Each point in the scatter plot refers to one of the 150 iris flowers # in the dataset, with the color indicating their respective type # (Setosa, Versicolour, and Virginica). -# Just based on the the 2 dimensions used in this plot - sepal width -# and sepal length - you can already see a pattern, but there's still -# overlap. +# You can already see a pattern regarding the Setosa type, which is +# easily identifiable based on its short and wide sepal. Only +# considering these 2 dimensions, sepal width and length, there's still +# overlap between the Versicolor and Virginica types. # %% # Plot a PCA representation From db80506d516bdc6455bf657d94a246d44499ab27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Sat, 12 Aug 2023 16:53:18 +0200 Subject: [PATCH 18/22] trying to fix the failed checks: add empty line before next cell --- examples/datasets/plot_iris_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index b714579076b74..24eaf6a7ae3fd 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -84,6 +84,7 @@ ax.zaxis.set_ticklabels([]) plt.show() + # %% # PCA will create 3 new features that are a linear combination of the # 4 original features. In addition, this transform maximizes the variance. From 35464844a4d990ed506c6779ac1e948bf49de94f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 5 Sep 2023 21:39:00 +0200 Subject: [PATCH 19/22] add new cell for comments --- examples/datasets/plot_iris_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 24eaf6a7ae3fd..dd7235cd5913e 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -47,6 +47,8 @@ _ = ax.legend( scatter.legend_elements()[0], iris.target_names, loc="lower right", title="Classes" ) + +# %% # Each point in the scatter plot refers to one of the 150 iris flowers # in the dataset, with the color indicating their respective type # (Setosa, Versicolour, and Virginica). From d83f0e5a7cb144ac7d2e0b1d420980b6cd18bfb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 5 Sep 2023 21:39:34 +0200 Subject: [PATCH 20/22] remove the part where the plot size was specified, we don't need it --- examples/datasets/plot_iris_dataset.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index dd7235cd5913e..f6966734f104d 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -35,10 +35,6 @@ # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401, E402 -# Prepare the plot size -plt.figure(2, figsize=(8, 6)) -plt.clf() - # Plot the training points across the 1st and 2nd feature # (sepal length and sepal width) _, ax = plt.subplots() From 57b5be97d94849f3ff0443b67a6cbde33b04f035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elisabeth=20G=C3=BCnther?= Date: Tue, 5 Sep 2023 21:44:22 +0200 Subject: [PATCH 21/22] add noqa because import is not done at the top of the file! --- examples/datasets/plot_iris_dataset.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index f6966734f104d..3611762061ebe 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -32,11 +32,6 @@ # -------------------------------- import matplotlib.pyplot as plt # noqa: E402 -# unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401, E402 - -# Plot the training points across the 1st and 2nd feature -# (sepal length and sepal width) _, ax = plt.subplots() scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target) ax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1]) @@ -59,6 +54,10 @@ # Let's apply a Principal Component Analysis (PCA) to the iris dataset # and then plot the irises across the first three PCA dimensions. # This will allow us to better differentiate between the three types! + +# unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401, E402 + from sklearn.decomposition import PCA # noqa: E402 fig = plt.figure(1, figsize=(8, 6)) From 474e8cc7b71da25135fe34a312fcd2e48e82bea2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 7 Sep 2023 16:03:59 +0200 Subject: [PATCH 22/22] MAINT remove unecessary check --- examples/datasets/plot_iris_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py index 3611762061ebe..32aba8918547e 100644 --- a/examples/datasets/plot_iris_dataset.py +++ b/examples/datasets/plot_iris_dataset.py @@ -30,7 +30,7 @@ # %% # Scatter Plot of the Iris dataset # -------------------------------- -import matplotlib.pyplot as plt # noqa: E402 +import matplotlib.pyplot as plt _, ax = plt.subplots() scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target) @@ -56,9 +56,9 @@ # This will allow us to better differentiate between the three types! # unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401, E402 +import mpl_toolkits.mplot3d # noqa: F401 -from sklearn.decomposition import PCA # noqa: E402 +from sklearn.decomposition import PCA fig = plt.figure(1, figsize=(8, 6)) ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)