From f3f8a415b9f2d923092a64d5c96d0891cdbeccc7 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Fri, 27 Sep 2024 14:00:33 +0530
Subject: [PATCH 1/9] merge validation curve and traintest_error

---
 .../plot_train_error_vs_test_error.py         | 42 ++++++++++++++++++
 .../model_selection/plot_validation_curve.py  | 43 -------------------
 2 files changed, 42 insertions(+), 43 deletions(-)
 delete mode 100644 examples/model_selection/plot_validation_curve.py

diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index dc370383b2ef7..7b6edc2d08cad 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -86,3 +86,45 @@
 plt.legend()
 plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
 plt.show()
+
+# %%
+# Plotting Validation Curves
+#-------------------------------------------------------------
+#In this plot, you can see the training and validation scores
+#of the ElasticNet model for different values of regularization
+#parameter alpha. As can be inferred from the plot, for very low values
+#of alpha (close to zero), the regularization is weak, meaning the model 
+#fits the training data very closely, leading to high training scores but lower
+#validation scores. This is a case of overfitting, where the model captures 
+#noise in the training data rather than the underlying pattern.
+#
+#Using the ``ValidationCurveDisplay`` class helps by automating the plotting of training 
+#and validation scores across a range of alpha values, eliminating the need for 
+# manual iteration and plotting, and providing a clear, consistent visualization 
+# of model performance.
+
+
+from sklearn.model_selection import ValidationCurveDisplay
+
+# Define the range of alphas (regularization strength) to explore
+alphas = np.logspace(-5, 1, 60)
+
+# Use the ValidationCurveDisplay to automatically plot the train and test scores
+disp = ValidationCurveDisplay.from_estimator(
+    enet,               # ElasticNet model
+    X_train,            # Training data
+    y_train,            # Training target
+    param_name="alpha", # Hyperparameter to vary
+    param_range=alphas, # Range of alpha values
+    scoring="r2",       # Scoring metric, R^2 in this case
+    n_jobs=-1,          # Use all available CPUs
+    score_type="both",  # Plot both training and test scores
+)
+
+# Customize the display
+disp.ax_.set_title("Validation Curve for ElasticNet (R^2 Score)")
+disp.ax_.set_xlabel(r"alpha (regularization strength)")
+disp.ax_.set_ylabel("R^2 Score")
+disp.ax_.set_ylim(0.0, 1.1)
+
+plt.show()
\ No newline at end of file
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
deleted file mode 100644
index 44a382fed0c17..0000000000000
--- a/examples/model_selection/plot_validation_curve.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-==========================
-Plotting Validation Curves
-==========================
-
-In this plot you can see the training scores and validation scores of an SVM
-for different values of the kernel parameter gamma. For very low values of
-gamma, you can see that both the training score and the validation score are
-low. This is called underfitting. Medium values of gamma will result in high
-values for both scores, i.e. the classifier is performing fairly well. If gamma
-is too high, the classifier will overfit, which means that the training score
-is good but the validation score is poor.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.datasets import load_digits
-from sklearn.model_selection import ValidationCurveDisplay
-from sklearn.svm import SVC
-
-X, y = load_digits(return_X_y=True)
-subset_mask = np.isin(y, [1, 2])  # binary classification: 1 vs 2
-X, y = X[subset_mask], y[subset_mask]
-
-disp = ValidationCurveDisplay.from_estimator(
-    SVC(),
-    X,
-    y,
-    param_name="gamma",
-    param_range=np.logspace(-6, -1, 5),
-    score_type="both",
-    n_jobs=2,
-    score_name="Accuracy",
-)
-disp.ax_.set_title("Validation Curve for SVM with an RBF kernel")
-disp.ax_.set_xlabel(r"gamma (inverse radius of the RBF kernel)")
-disp.ax_.set_ylim(0.0, 1.1)
-plt.show()

From 5b947b7f19d82decaa1b50fca627d78fa32f5582 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Fri, 27 Sep 2024 14:15:03 +0530
Subject: [PATCH 2/9] fix linting

---
 .../plot_train_error_vs_test_error.py         | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index 7b6edc2d08cad..8a24126852d35 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -89,19 +89,19 @@
 
 # %%
 # Plotting Validation Curves
-#-------------------------------------------------------------
-#In this plot, you can see the training and validation scores
-#of the ElasticNet model for different values of regularization
-#parameter alpha. As can be inferred from the plot, for very low values
-#of alpha (close to zero), the regularization is weak, meaning the model 
-#fits the training data very closely, leading to high training scores but lower
-#validation scores. This is a case of overfitting, where the model captures 
-#noise in the training data rather than the underlying pattern.
+# -------------------------------------------------------------
+# In this plot, you can see the training and validation scores
+# of the ElasticNet model for different values of regularization
+# parameter alpha. As can be inferred from the plot, for very low values
+# of alpha (close to zero), the regularization is weak, meaning the model
+# fits the training data very closely, leading to high training scores but lower
+# validation scores. This is a case of overfitting, where the model captures
+# noise in the training data rather than the underlying pattern.
 #
-#Using the ``ValidationCurveDisplay`` class helps by automating the plotting of training 
-#and validation scores across a range of alpha values, eliminating the need for 
-# manual iteration and plotting, and providing a clear, consistent visualization 
-# of model performance.
+# Using the ``ValidationCurveDisplay`` class helps by automating the plotting of
+# trainingand validation scores across a range of alpha values, eliminating the
+# need for manual iteration and plotting, and providing a clear, consistent
+# visualization of model performance.
 
 
 from sklearn.model_selection import ValidationCurveDisplay
@@ -111,13 +111,13 @@
 
 # Use the ValidationCurveDisplay to automatically plot the train and test scores
 disp = ValidationCurveDisplay.from_estimator(
-    enet,               # ElasticNet model
-    X_train,            # Training data
-    y_train,            # Training target
-    param_name="alpha", # Hyperparameter to vary
-    param_range=alphas, # Range of alpha values
-    scoring="r2",       # Scoring metric, R^2 in this case
-    n_jobs=-1,          # Use all available CPUs
+    enet,  # ElasticNet model
+    X_train,  # Training data
+    y_train,  # Training target
+    param_name="alpha",  # Hyperparameter to vary
+    param_range=alphas,  # Range of alpha values
+    scoring="r2",  # Scoring metric, R^2 in this case
+    n_jobs=-1,  # Use all available CPUs
     score_type="both",  # Plot both training and test scores
 )
 
@@ -127,4 +127,4 @@
 disp.ax_.set_ylabel("R^2 Score")
 disp.ax_.set_ylim(0.0, 1.1)
 
-plt.show()
\ No newline at end of file
+plt.show()

From 30cfdb832c058819860702a73baef64037de830b Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Fri, 27 Sep 2024 21:35:24 +0530
Subject: [PATCH 3/9] update redirects in conf.py

---
 doc/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/conf.py b/doc/conf.py
index d07926b8b27f4..c15bf3e0852f8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -447,6 +447,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
     "auto_examples/model_selection/grid_search_text_feature_extraction.py": (
         "auto_examples/model_selection/plot_grid_search_text_feature_extraction.py"
     ),
+    "auto_examples/model_selection/plot_validation_curve.py": (
+        "auto_examples/model_selection/plot_train_error_vs_test_error.py"
+    ),
     "auto_examples/miscellaneous/plot_changed_only_pprint_parameter": (
         "auto_examples/miscellaneous/plot_estimator_representation"
     ),

From 9cff8440e38dfaf9056b696a75c7b1f4637832ab Mon Sep 17 00:00:00 2001
From: Deepak Saldanha <saldanhadeepakuconn@gmail.com>
Date: Sat, 28 Sep 2024 09:20:34 +0000
Subject: [PATCH 4/9] rename file, reflecting change

---
 ...rror_vs_test_error_and_validation_curve.py | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py

diff --git a/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py b/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py
new file mode 100644
index 0000000000000..8a24126852d35
--- /dev/null
+++ b/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py
@@ -0,0 +1,130 @@
+"""
+=========================
+Train error vs Test error
+=========================
+
+Illustration of how the performance of an estimator on unseen data (test data)
+is not the same as the performance on training data. As the regularization
+increases the performance on train decreases while the performance on test
+is optimal within a range of values of the regularization parameter.
+The example with an Elastic-Net regression model and the performance is
+measured using the explained variance a.k.a. R^2.
+
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# %%
+# Generate sample data
+# --------------------
+import numpy as np
+
+from sklearn import linear_model
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+
+n_samples_train, n_samples_test, n_features = 75, 150, 500
+X, y, coef = make_regression(
+    n_samples=n_samples_train + n_samples_test,
+    n_features=n_features,
+    n_informative=50,
+    shuffle=False,
+    noise=1.0,
+    coef=True,
+)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, train_size=n_samples_train, test_size=n_samples_test, shuffle=False
+)
+# %%
+# Compute train and test errors
+# -----------------------------
+alphas = np.logspace(-5, 1, 60)
+enet = linear_model.ElasticNet(l1_ratio=0.7, max_iter=10000)
+train_errors = list()
+test_errors = list()
+for alpha in alphas:
+    enet.set_params(alpha=alpha)
+    enet.fit(X_train, y_train)
+    train_errors.append(enet.score(X_train, y_train))
+    test_errors.append(enet.score(X_test, y_test))
+
+i_alpha_optim = np.argmax(test_errors)
+alpha_optim = alphas[i_alpha_optim]
+print("Optimal regularization parameter : %s" % alpha_optim)
+
+# Estimate the coef_ on full data with optimal regularization parameter
+enet.set_params(alpha=alpha_optim)
+coef_ = enet.fit(X, y).coef_
+
+# %%
+# Plot results functions
+# ----------------------
+
+import matplotlib.pyplot as plt
+
+plt.subplot(2, 1, 1)
+plt.semilogx(alphas, train_errors, label="Train")
+plt.semilogx(alphas, test_errors, label="Test")
+plt.vlines(
+    alpha_optim,
+    plt.ylim()[0],
+    np.max(test_errors),
+    color="k",
+    linewidth=3,
+    label="Optimum on test",
+)
+plt.legend(loc="lower right")
+plt.ylim([0, 1.2])
+plt.xlabel("Regularization parameter")
+plt.ylabel("Performance")
+
+# Show estimated coef_ vs true coef
+plt.subplot(2, 1, 2)
+plt.plot(coef, label="True coef")
+plt.plot(coef_, label="Estimated coef")
+plt.legend()
+plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
+plt.show()
+
+# %%
+# Plotting Validation Curves
+# -------------------------------------------------------------
+# In this plot, you can see the training and validation scores
+# of the ElasticNet model for different values of regularization
+# parameter alpha. As can be inferred from the plot, for very low values
+# of alpha (close to zero), the regularization is weak, meaning the model
+# fits the training data very closely, leading to high training scores but lower
+# validation scores. This is a case of overfitting, where the model captures
+# noise in the training data rather than the underlying pattern.
+#
+# Using the ``ValidationCurveDisplay`` class helps by automating the plotting of
+# trainingand validation scores across a range of alpha values, eliminating the
+# need for manual iteration and plotting, and providing a clear, consistent
+# visualization of model performance.
+
+
+from sklearn.model_selection import ValidationCurveDisplay
+
+# Define the range of alphas (regularization strength) to explore
+alphas = np.logspace(-5, 1, 60)
+
+# Use the ValidationCurveDisplay to automatically plot the train and test scores
+disp = ValidationCurveDisplay.from_estimator(
+    enet,  # ElasticNet model
+    X_train,  # Training data
+    y_train,  # Training target
+    param_name="alpha",  # Hyperparameter to vary
+    param_range=alphas,  # Range of alpha values
+    scoring="r2",  # Scoring metric, R^2 in this case
+    n_jobs=-1,  # Use all available CPUs
+    score_type="both",  # Plot both training and test scores
+)
+
+# Customize the display
+disp.ax_.set_title("Validation Curve for ElasticNet (R^2 Score)")
+disp.ax_.set_xlabel(r"alpha (regularization strength)")
+disp.ax_.set_ylabel("R^2 Score")
+disp.ax_.set_ylim(0.0, 1.1)
+
+plt.show()

From d794f2e84d410b8355b8b1fd4eb797660a568b22 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Sun, 29 Sep 2024 02:04:58 +0530
Subject: [PATCH 5/9] update link in conf.py

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index c15bf3e0852f8..cc92fc6ccd08b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -448,7 +448,7 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
         "auto_examples/model_selection/plot_grid_search_text_feature_extraction.py"
     ),
     "auto_examples/model_selection/plot_validation_curve.py": (
-        "auto_examples/model_selection/plot_train_error_vs_test_error.py"
+        "auto_examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py"
     ),
     "auto_examples/miscellaneous/plot_changed_only_pprint_parameter": (
         "auto_examples/miscellaneous/plot_estimator_representation"

From ac8d7776cf169d071d245b1c6b8eab92908fd5f6 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Wed, 2 Oct 2024 01:21:34 +0530
Subject: [PATCH 6/9] implement changes post review

---
 doc/conf.py                                   |  2 +-
 .../plot_train_error_vs_test_error.py         | 77 +++++++++----------
 2 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 32b5dfc788f0a..f1abff0b1b4b9 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -448,7 +448,7 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
         "auto_examples/model_selection/plot_grid_search_text_feature_extraction.py"
     ),
     "auto_examples/model_selection/plot_validation_curve.py": (
-        "auto_examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py"
+        "auto_examples/model_selection/plot_train_error_vs_test_error.py"
     ),
     "auto_examples/datasets/plot_digits_last_image.py": (
         "auto_examples/exercises/plot_digits_classification_exercises.py"
diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index 8a24126852d35..5abcc104cf058 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -57,36 +57,6 @@
 enet.set_params(alpha=alpha_optim)
 coef_ = enet.fit(X, y).coef_
 
-# %%
-# Plot results functions
-# ----------------------
-
-import matplotlib.pyplot as plt
-
-plt.subplot(2, 1, 1)
-plt.semilogx(alphas, train_errors, label="Train")
-plt.semilogx(alphas, test_errors, label="Test")
-plt.vlines(
-    alpha_optim,
-    plt.ylim()[0],
-    np.max(test_errors),
-    color="k",
-    linewidth=3,
-    label="Optimum on test",
-)
-plt.legend(loc="lower right")
-plt.ylim([0, 1.2])
-plt.xlabel("Regularization parameter")
-plt.ylabel("Performance")
-
-# Show estimated coef_ vs true coef
-plt.subplot(2, 1, 2)
-plt.plot(coef, label="True coef")
-plt.plot(coef_, label="Estimated coef")
-plt.legend()
-plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
-plt.show()
-
 # %%
 # Plotting Validation Curves
 # -------------------------------------------------------------
@@ -103,28 +73,51 @@
 # need for manual iteration and plotting, and providing a clear, consistent
 # visualization of model performance.
 
+import matplotlib.pyplot as plt
 
 from sklearn.model_selection import ValidationCurveDisplay
 
-# Define the range of alphas (regularization strength) to explore
 alphas = np.logspace(-5, 1, 60)
 
-# Use the ValidationCurveDisplay to automatically plot the train and test scores
 disp = ValidationCurveDisplay.from_estimator(
-    enet,  # ElasticNet model
-    X_train,  # Training data
-    y_train,  # Training target
-    param_name="alpha",  # Hyperparameter to vary
-    param_range=alphas,  # Range of alpha values
-    scoring="r2",  # Scoring metric, R^2 in this case
-    n_jobs=-1,  # Use all available CPUs
-    score_type="both",  # Plot both training and test scores
+    enet,
+    X_train,
+    y_train,
+    param_name="alpha",
+    param_range=alphas,
+    scoring="r2",
+    n_jobs=2,
+    score_type="both",
 )
 
-# Customize the display
 disp.ax_.set_title("Validation Curve for ElasticNet (R^2 Score)")
 disp.ax_.set_xlabel(r"alpha (regularization strength)")
 disp.ax_.set_ylabel("R^2 Score")
-disp.ax_.set_ylim(0.0, 1.1)
+disp.ax_.set_ylim(-1.0, 1.2)
+disp.ax_.vlines(
+    alpha_optim,
+    disp.ax_.get_ylim()[0],
+    np.max(test_errors),
+    color="k",
+    linewidth=3,
+    label="Optimum on test",
+)
+disp.ax_.legend(loc="lower right")
+
+plt.show()
+
+# %%
+# Plotting Performance Comparison Curves
+# -------------------------------------------------------------
+# This plot compares the true coefficients (coef) with the estimated coefficients (coef_)
+# from the model. It visually helps     assess how well the model has captured the
+# underlying patterns in the data.
 
+plt.plot(coef, label="True coef")
+plt.plot(coef_, label="Estimated coef")
+plt.legend()
+plt.title("True vs Estimated Coefficients")
+plt.xlabel("Feature Index")
+plt.ylabel("Coefficient Value")
+plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
 plt.show()

From 3217ccc62c760181417aba129d9f1a34364eea03 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Wed, 2 Oct 2024 01:22:32 +0530
Subject: [PATCH 7/9] delete duplicate file

---
 ...rror_vs_test_error_and_validation_curve.py | 130 ------------------
 1 file changed, 130 deletions(-)
 delete mode 100644 examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py

diff --git a/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py b/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py
deleted file mode 100644
index 8a24126852d35..0000000000000
--- a/examples/model_selection/plot_train_error_vs_test_error_and_validation_curve.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-=========================
-Train error vs Test error
-=========================
-
-Illustration of how the performance of an estimator on unseen data (test data)
-is not the same as the performance on training data. As the regularization
-increases the performance on train decreases while the performance on test
-is optimal within a range of values of the regularization parameter.
-The example with an Elastic-Net regression model and the performance is
-measured using the explained variance a.k.a. R^2.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-# %%
-# Generate sample data
-# --------------------
-import numpy as np
-
-from sklearn import linear_model
-from sklearn.datasets import make_regression
-from sklearn.model_selection import train_test_split
-
-n_samples_train, n_samples_test, n_features = 75, 150, 500
-X, y, coef = make_regression(
-    n_samples=n_samples_train + n_samples_test,
-    n_features=n_features,
-    n_informative=50,
-    shuffle=False,
-    noise=1.0,
-    coef=True,
-)
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, train_size=n_samples_train, test_size=n_samples_test, shuffle=False
-)
-# %%
-# Compute train and test errors
-# -----------------------------
-alphas = np.logspace(-5, 1, 60)
-enet = linear_model.ElasticNet(l1_ratio=0.7, max_iter=10000)
-train_errors = list()
-test_errors = list()
-for alpha in alphas:
-    enet.set_params(alpha=alpha)
-    enet.fit(X_train, y_train)
-    train_errors.append(enet.score(X_train, y_train))
-    test_errors.append(enet.score(X_test, y_test))
-
-i_alpha_optim = np.argmax(test_errors)
-alpha_optim = alphas[i_alpha_optim]
-print("Optimal regularization parameter : %s" % alpha_optim)
-
-# Estimate the coef_ on full data with optimal regularization parameter
-enet.set_params(alpha=alpha_optim)
-coef_ = enet.fit(X, y).coef_
-
-# %%
-# Plot results functions
-# ----------------------
-
-import matplotlib.pyplot as plt
-
-plt.subplot(2, 1, 1)
-plt.semilogx(alphas, train_errors, label="Train")
-plt.semilogx(alphas, test_errors, label="Test")
-plt.vlines(
-    alpha_optim,
-    plt.ylim()[0],
-    np.max(test_errors),
-    color="k",
-    linewidth=3,
-    label="Optimum on test",
-)
-plt.legend(loc="lower right")
-plt.ylim([0, 1.2])
-plt.xlabel("Regularization parameter")
-plt.ylabel("Performance")
-
-# Show estimated coef_ vs true coef
-plt.subplot(2, 1, 2)
-plt.plot(coef, label="True coef")
-plt.plot(coef_, label="Estimated coef")
-plt.legend()
-plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
-plt.show()
-
-# %%
-# Plotting Validation Curves
-# -------------------------------------------------------------
-# In this plot, you can see the training and validation scores
-# of the ElasticNet model for different values of regularization
-# parameter alpha. As can be inferred from the plot, for very low values
-# of alpha (close to zero), the regularization is weak, meaning the model
-# fits the training data very closely, leading to high training scores but lower
-# validation scores. This is a case of overfitting, where the model captures
-# noise in the training data rather than the underlying pattern.
-#
-# Using the ``ValidationCurveDisplay`` class helps by automating the plotting of
-# trainingand validation scores across a range of alpha values, eliminating the
-# need for manual iteration and plotting, and providing a clear, consistent
-# visualization of model performance.
-
-
-from sklearn.model_selection import ValidationCurveDisplay
-
-# Define the range of alphas (regularization strength) to explore
-alphas = np.logspace(-5, 1, 60)
-
-# Use the ValidationCurveDisplay to automatically plot the train and test scores
-disp = ValidationCurveDisplay.from_estimator(
-    enet,  # ElasticNet model
-    X_train,  # Training data
-    y_train,  # Training target
-    param_name="alpha",  # Hyperparameter to vary
-    param_range=alphas,  # Range of alpha values
-    scoring="r2",  # Scoring metric, R^2 in this case
-    n_jobs=-1,  # Use all available CPUs
-    score_type="both",  # Plot both training and test scores
-)
-
-# Customize the display
-disp.ax_.set_title("Validation Curve for ElasticNet (R^2 Score)")
-disp.ax_.set_xlabel(r"alpha (regularization strength)")
-disp.ax_.set_ylabel("R^2 Score")
-disp.ax_.set_ylim(0.0, 1.1)
-
-plt.show()

From 24c904ab22624f668efcb9b85015756f5d5c2a97 Mon Sep 17 00:00:00 2001
From: saldanhad <deepak.saldanha007@gmail.com>
Date: Wed, 2 Oct 2024 01:26:46 +0530
Subject: [PATCH 8/9] fix linting

---
 examples/model_selection/plot_train_error_vs_test_error.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index 5abcc104cf058..be6ac0c4031f9 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -109,8 +109,8 @@
 # %%
 # Plotting Performance Comparison Curves
 # -------------------------------------------------------------
-# This plot compares the true coefficients (coef) with the estimated coefficients (coef_)
-# from the model. It visually helps     assess how well the model has captured the
+# This plot compares the true coefficients (coef) with the estimated coefficients
+# (coef_) from the model. It visually helps assess how well the model has captured the
 # underlying patterns in the data.
 
 plt.plot(coef, label="True coef")

From 5376ea22cb42f0b305ba741d7e56c62e40ccc882 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Fri, 11 Oct 2024 17:37:36 +0200
Subject: [PATCH 9/9] improve narrative of the example and improve visual by
 tweeking data and model

---
 doc/conf.py                                   |   4 +-
 .../plot_train_error_vs_test_error.py         | 191 +++++++++++-------
 2 files changed, 119 insertions(+), 76 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 079c82422b0a7..278b588c103b5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -447,8 +447,8 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
     "auto_examples/model_selection/grid_search_text_feature_extraction": (
         "auto_examples/model_selection/plot_grid_search_text_feature_extraction"
     ),
-    "auto_examples/model_selection/plot_validation_curve.py": (
-        "auto_examples/model_selection/plot_train_error_vs_test_error.py"
+    "auto_examples/model_selection/plot_validation_curve": (
+        "auto_examples/model_selection/plot_train_error_vs_test_error"
     ),
     "auto_examples/datasets/plot_digits_last_image": (
         "auto_examples/exercises/plot_digits_classification_exercises"
diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index be6ac0c4031f9..a64b4ca94846e 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -1,15 +1,18 @@
 """
-=========================
-Train error vs Test error
-=========================
-
-Illustration of how the performance of an estimator on unseen data (test data)
-is not the same as the performance on training data. As the regularization
-increases the performance on train decreases while the performance on test
-is optimal within a range of values of the regularization parameter.
-The example with an Elastic-Net regression model and the performance is
-measured using the explained variance a.k.a. R^2.
-
+=========================================================
+Effect of model regularization on training and test error
+=========================================================
+
+In this example, we evaluate the impact of the regularization parameter in a
+linear model called :class:`~sklearn.linear_model.ElasticNet`. To carry out this
+evaluation, we use a validation curve using
+:class:`~sklearn.model_selection.ValidationCurveDisplay`. This curve shows the
+training and test scores of the model for different values of the regularization
+parameter.
+
+Once we identify the optimal regularization parameter, we compare the true and
+estimated coefficients of the model to determine if the model is able to recover
+the coefficients from the noisy input data.
 """
 
 # Authors: The scikit-learn developers
@@ -18,67 +21,75 @@
 # %%
 # Generate sample data
 # --------------------
-import numpy as np
-
-from sklearn import linear_model
+#
+# We generate a regression dataset that contains many features relative to the
+# number of samples. However, only 10% of the features are informative. In this context,
+# linear models exposing L1 penalization are commonly used to recover a sparse
+# set of coefficients.
 from sklearn.datasets import make_regression
 from sklearn.model_selection import train_test_split
 
-n_samples_train, n_samples_test, n_features = 75, 150, 500
-X, y, coef = make_regression(
+n_samples_train, n_samples_test, n_features = 150, 300, 500
+X, y, true_coef = make_regression(
     n_samples=n_samples_train + n_samples_test,
     n_features=n_features,
     n_informative=50,
     shuffle=False,
     noise=1.0,
     coef=True,
+    random_state=42,
 )
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, train_size=n_samples_train, test_size=n_samples_test, shuffle=False
 )
-# %%
-# Compute train and test errors
-# -----------------------------
-alphas = np.logspace(-5, 1, 60)
-enet = linear_model.ElasticNet(l1_ratio=0.7, max_iter=10000)
-train_errors = list()
-test_errors = list()
-for alpha in alphas:
-    enet.set_params(alpha=alpha)
-    enet.fit(X_train, y_train)
-    train_errors.append(enet.score(X_train, y_train))
-    test_errors.append(enet.score(X_test, y_test))
-
-i_alpha_optim = np.argmax(test_errors)
-alpha_optim = alphas[i_alpha_optim]
-print("Optimal regularization parameter : %s" % alpha_optim)
-
-# Estimate the coef_ on full data with optimal regularization parameter
-enet.set_params(alpha=alpha_optim)
-coef_ = enet.fit(X, y).coef_
 
 # %%
-# Plotting Validation Curves
-# -------------------------------------------------------------
-# In this plot, you can see the training and validation scores
-# of the ElasticNet model for different values of regularization
-# parameter alpha. As can be inferred from the plot, for very low values
-# of alpha (close to zero), the regularization is weak, meaning the model
-# fits the training data very closely, leading to high training scores but lower
-# validation scores. This is a case of overfitting, where the model captures
-# noise in the training data rather than the underlying pattern.
+# Model definition
+# ----------------
+#
+# Here, we do not use a model that only exposes an L1 penalty. Instead, we use
+# an :class:`~sklearn.linear_model.ElasticNet` model that exposes both L1 and L2
+# penalties.
+#
+# We fix the `l1_ratio` parameter such that the solution found by the model is still
+# sparse. Therefore, this type of model tries to find a sparse solution but at the same
+# time also tries to shrink all coefficients towards zero.
 #
-# Using the ``ValidationCurveDisplay`` class helps by automating the plotting of
-# trainingand validation scores across a range of alpha values, eliminating the
-# need for manual iteration and plotting, and providing a clear, consistent
-# visualization of model performance.
+# In addition, we force the coefficients of the model to be positive since we know that
+# `make_regression` generates a response with a positive signal. So we use this
+# pre-knowledge to get a better model.
 
-import matplotlib.pyplot as plt
+from sklearn.linear_model import ElasticNet
+
+enet = ElasticNet(l1_ratio=0.9, positive=True, max_iter=10_000)
+
+
+# %%
+# Evaluate the impact of the regularization parameter
+# ---------------------------------------------------
+#
+# To evaluate the impact of the regularization parameter, we use a validation
+# curve. This curve shows the training and test scores of the model for different
+# values of the regularization parameter.
+#
+# The regularization `alpha` is a parameter applied to the coefficients of the model:
+# when it tends to zero, no regularization is applied and the model tries to fit the
+# training data with the least amount of error. However, it leads to overfitting when
+# features are noisy. When `alpha` increases, the model coefficients are constrained,
+# and thus the model cannot fit the training data as closely, avoiding overfitting.
+# However, if too much regularization is applied, the model underfits the data and
+# is not able to properly capture the signal.
+#
+# The validation curve helps in finding a good trade-off between both extremes: the
+# model is not regularized and thus flexible enough to fit the signal, but not too
+# flexible to overfit. The :class:`~sklearn.model_selection.ValidationCurveDisplay`
+# allows us to display the training and validation scores across a range of alpha
+# values.
+import numpy as np
 
 from sklearn.model_selection import ValidationCurveDisplay
 
 alphas = np.logspace(-5, 1, 60)
-
 disp = ValidationCurveDisplay.from_estimator(
     enet,
     X_train,
@@ -89,35 +100,67 @@
     n_jobs=2,
     score_type="both",
 )
+disp.ax_.set(
+    title=r"Validation Curve for ElasticNet (R$^2$ Score)",
+    xlabel=r"alpha (regularization strength)",
+    ylabel="R$^2$ Score",
+)
 
-disp.ax_.set_title("Validation Curve for ElasticNet (R^2 Score)")
-disp.ax_.set_xlabel(r"alpha (regularization strength)")
-disp.ax_.set_ylabel("R^2 Score")
-disp.ax_.set_ylim(-1.0, 1.2)
+test_scores_mean = disp.test_scores.mean(axis=1)
+idx_avg_max_test_score = np.argmax(test_scores_mean)
 disp.ax_.vlines(
-    alpha_optim,
+    alphas[idx_avg_max_test_score],
     disp.ax_.get_ylim()[0],
-    np.max(test_errors),
+    test_scores_mean[idx_avg_max_test_score],
     color="k",
-    linewidth=3,
-    label="Optimum on test",
+    linewidth=2,
+    linestyle="--",
+    label=f"Optimum on test\n$\\alpha$ = {alphas[idx_avg_max_test_score]:.2e}",
 )
-disp.ax_.legend(loc="lower right")
+_ = disp.ax_.legend(loc="lower right")
 
-plt.show()
+# %%
+# To find the optimal regularization parameter, we can select the value of `alpha`
+# that maximizes the validation score.
+#
+# Coefficients comparison
+# -----------------------
+#
+# Now that we have identified the optimal regularization parameter, we can compare the
+# true coefficients and the estimated coefficients.
+#
+# First, let's set the regularization parameter to the optimal value and fit the
+# model on the training data. In addition, we'll show the test score for this model.
+enet.set_params(alpha=alphas[idx_avg_max_test_score]).fit(X_train, y_train)
+print(
+    f"Test score: {enet.score(X_test, y_test):.3f}",
+)
 
 # %%
-# Plotting Performance Comparison Curves
-# -------------------------------------------------------------
-# This plot compares the true coefficients (coef) with the estimated coefficients
-# (coef_) from the model. It visually helps assess how well the model has captured the
-# underlying patterns in the data.
-
-plt.plot(coef, label="True coef")
-plt.plot(coef_, label="Estimated coef")
-plt.legend()
-plt.title("True vs Estimated Coefficients")
-plt.xlabel("Feature Index")
-plt.ylabel("Coefficient Value")
-plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
+# Now, we plot the true coefficients and the estimated coefficients.
+import matplotlib.pyplot as plt
+
+fig, axs = plt.subplots(ncols=2, figsize=(12, 6), sharex=True, sharey=True)
+for ax, coef, title in zip(axs, [true_coef, enet.coef_], ["True", "Model"]):
+    ax.stem(coef)
+    ax.set(
+        title=f"{title} Coefficients",
+        xlabel="Feature Index",
+        ylabel="Coefficient Value",
+    )
+fig.suptitle(
+    "Comparison of the coefficients of the true generative model and \n"
+    "the estimated elastic net coefficients"
+)
+
 plt.show()
+
+# %%
+# While the original coefficients are sparse, the estimated coefficients are not
+# as sparse. The reason is that we fixed the `l1_ratio` parameter to 0.9. We could
+# force the model to get a sparser solution by increasing the `l1_ratio` parameter.
+#
+# However, we observed that for the estimated coefficients that are close to zero in
+# the true generative model, our model shrinks them towards zero. So we don't recover
+# the true coefficients, but we get a sensible outcome in line with the performance
+# obtained on the test set.