scikit-learn · Shreesha3112 · Jul 31, 2023 · Aug 10, 2023 · Aug 15, 2023 · Aug 15, 2023
diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py
@@ -3,34 +3,197 @@
 Recursive feature elimination
 =============================
 
-A recursive feature elimination example showing the relevance of pixels in
-a digit classification task.
+This example demonstrates how :class:`~sklearn.feature_selection.RFE` can be used
+to determine the importance of individual pixels when classifying handwritten digits.
+RFE is a method that recursively removes the least significant features and retrains
+the model, allowing us to rank features by their importance.
 
 .. note::
 
     See also :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`
 
 """  # noqa: E501
 
+# %%
+# Dataset
+# -------
+#
+# We start by loading the handwritten digits dataset. This dataset consists of 8x8
+# pixel images of handwritten digits. Each pixel is treated as a feature and we
+# aim to determine which pixels are most relevant for the digit classification task.
+
+# %%
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_digits
-from sklearn.feature_selection import RFE
-from sklearn.svm import SVC
 
 # Load the digits dataset
 digits = load_digits()
 X = digits.images.reshape((len(digits.images), -1))
 y = digits.target
 
-# Create the RFE object and rank each pixel
+# Display the first digit
+plt.imshow(digits.images[0], cmap="gray")
+plt.title(f"Label: {digits.target[0]}")
+plt.axis("off")
+plt.show()
+
+# %%
+# Splitting the dataset for evaluation
+# ------------------------------------
+#
+# To assess the benefits of feature selection with
+# :class:`~sklearn.feature_selection.RFE`, we need a training set for selecting
+# features and training our model, and a test set for evaluation.
+# We'll allocate 70% of the data for training and 30% for testing.
+
+# %%
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+
+# %%
+# Benchmarking SVM without Feature Selection
+# ------------------------------------------
+#
+# Before applying :class:`~sklearn.feature_selection.RFE`, let's benchmark the
+# performance of a :class:`~sklearn.svm.SVC` using all features. This will give us
+# a baseline accuracy to compare against.
+
+# %%
+from sklearn.metrics import accuracy_score
+from sklearn.svm import SVC
+
 svc = SVC(kernel="linear", C=1)
-rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
-rfe.fit(X, y)
-ranking = rfe.ranking_.reshape(digits.images[0].shape)
+svc.fit(X_train, y_train)
+y_pred = svc.predict(X_test)
+accuracy_all_features = accuracy_score(y_test, y_pred)
+
+print(f"Accuracy using all {X_train.shape[1]} features: {accuracy_all_features:.4f}")
+
+# %%
+# Feature Selection with RFE
+# --------------------------
+#
+# Now, we'll employ :class:`~sklearn.feature_selection.RFE` to select a subset of
+# the most discriminative features. The goal is to determine if a reduced set of
+# important features can either maintain or even improve the classifier's performance.
+
+# %%
+from sklearn.feature_selection import RFE
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+
+# Define the parameters for the grid search
+param_grid = {"rfe__n_features_to_select": [1, 5, 10, 20, 30, 40, 50, 64]}
+
+# Create a pipeline with feature selection followed by SVM
+pipe = Pipeline(
+    [
+        ("rfe", RFE(estimator=SVC(kernel="linear", C=1))),
+        ("svc", SVC(kernel="linear", C=1)),
+    ]
+)
+
+# Create the grid search object
+grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring="accuracy", n_jobs=-1)
+
+# Fit to the data and get the best estimator
+grid_search.fit(X_train, y_train)
+best_pipeline = grid_search.best_estimator_
 
-# Plot pixel ranking
+# Extract the optimal number of features from the best estimator
+optimal_num_features = best_pipeline.named_steps["rfe"].n_features_
+
+print(f"Optimal number of features: {optimal_num_features}")
+
+# %%
+# Evaluating SVM on Selected Features
+# -----------------------------------
+#
+# With the top features selected by :class:`~sklearn.feature_selection.RFE`, let's
+# train a new :class:`~sklearn.svm.SVC` and assess its performance. The idea is to
+# observe if there's any significant change in accuracy, ideally aiming for improvement.
+
+# %%
+y_pred_rfe = grid_search.predict(X_test)
+
+# Get accuracy of model using selected features
+accuracy_selected_features = accuracy_score(y_test, y_pred_rfe)
+
+# get num selected features
+selected_features = best_pipeline.named_steps["rfe"].support_
+num_features_to_select = selected_features.sum()
+
+print(
+    f"Accuracy using {num_features_to_select} selected features:"
+    f" {accuracy_selected_features:.4f}"
+)
+
+# %%
+# Visualizing Feature Importance after RFE
+# ----------------------------------------
+#
+# :class:`~sklearn.feature_selection.RFE` provides a ranking of the features based on
+# their importance. We can visualize this ranking to gain insights into which pixels
+# (or features) are deemed most significant by :class:`~sklearn.feature_selection.RFE`
+# in the digit classification task.
+
+# %%
+ranking = best_pipeline.named_steps["rfe"].ranking_.reshape(digits.images[0].shape)
 plt.matshow(ranking, cmap=plt.cm.Blues)
 plt.colorbar()
 plt.title("Ranking of pixels with RFE")
 plt.show()
+
+# %%
+# Feature Selection Impact on Model Accuracy
+# ---------------------------------------------------
+#
+# To understand the relationship between the number of features selected and model
+# performance, let's train the :class:`~sklearn.svm.SVC` on various subsets of
+# features ranked by :class:`~sklearn.feature_selection.RFE`. We'll then plot the
+# accuracy of the model as a function of the number of features used. This will help
+# us visualize any trade-offs between feature selection and model accuracy.
+
+# %%
+import numpy as np
+
+# Split the dataset
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+
+# Train with RFE to get the rankings (as done earlier in the code)
+svc = SVC(kernel="linear", C=1)
+rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
+rfe.fit(X_train, y_train)
+ranking = rfe.ranking_
+
+# Store accuracies
+# Adjust the step for finer granularity
+num_features_list = [1, 5, 10, 20, 30, 40, 50, 64]
+accuracies = []
+
+for num_features in num_features_list:
+    # Select top 'num_features' important features
+    top_features_idx = np.where(ranking <= num_features)[0]
+    X_train_selected = X_train[:, top_features_idx]
+    X_test_selected = X_test[:, top_features_idx]
+
+    # Train SVM and get accuracy
+    svc_selected = SVC(kernel="linear", C=1)
+    svc_selected.fit(X_train_selected, y_train)
+    y_pred = svc_selected.predict(X_test_selected)
+    accuracy = accuracy_score(y_test, y_pred)
+    accuracies.append(accuracy)
+
+# Plot the accuracies
+plt.plot(num_features_list, accuracies, marker="o", linestyle="-")
+plt.xlabel("Number of Selected Features")
+plt.ylabel("Accuracy")
+plt.title("Feature Selection Impact on Model Accuracy")
+plt.grid(True)
+plt.show()
diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -72,6 +72,8 @@ class RFE(_RoutingNotSupportedMixin, SelectorMixin, MetaEstimatorMixin, BaseEsti
     That procedure is recursively repeated on the pruned set until the desired
     number of features to select is eventually reached.
 
+    For an example on usage, see
+    :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_digits.py`.
     Read more in the :ref:`User Guide <rfe>`.
 
     Parameters