scikit-learn · adrinjalali · Oct 11, 2023 · Sep 4, 2023 · Sep 4, 2023 · Sep 4, 2023
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
@@ -280,6 +280,20 @@ Changelog
   deprecated and will be removed in 1.6. :pr:`26830` by :user:`Stefanie Senger
   <StefanieSenger>`.
 
+:mod:`sklearn.inspection`
+.........................
+
+- |Enhancement| :class:`inspection.DecisionBoundaryDisplay` now accepts a parameter
+  `class_of_interest` to select the class of interest when plotting the response
+  provided by `response_method="predict_proba"` or
+  `response_method="decision_function"`. It allows to plot the decision boundary for
+  both binary and multiclass classifiers.
+  :pr:`27291` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- |API| :class:`inspection.DecisionBoundaryDisplay` raise an `AttributeError` instead
+  of a `ValueError` when an estimator does not implement the requested response method.
+  :pr:`27291` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.linear_model`
 ...........................
 

diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
@@ -22,10 +22,12 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
+from matplotlib import cm
 
 from sklearn import datasets
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF
+from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score
 from sklearn.svm import SVC
@@ -56,40 +58,39 @@
 
 n_classifiers = len(classifiers)
 
-plt.figure(figsize=(3 * 2, n_classifiers * 2))
-plt.subplots_adjust(bottom=0.2, top=0.95)
-
-xx = np.linspace(3, 9, 100)
-yy = np.linspace(1, 5, 100).T
-xx, yy = np.meshgrid(xx, yy)
-Xfull = np.c_[xx.ravel(), yy.ravel()]
-
-for index, (name, classifier) in enumerate(classifiers.items()):
-    classifier.fit(X, y)
-
-    y_pred = classifier.predict(X)
+fig, axes = plt.subplots(
+    nrows=n_classifiers,
+    ncols=len(iris.target_names),
+    figsize=(3 * 2, n_classifiers * 2),
+)
+for classifier_idx, (name, classifier) in enumerate(classifiers.items()):
+    y_pred = classifier.fit(X, y).predict(X)
     accuracy = accuracy_score(y, y_pred)
-    print("Accuracy (train) for %s: %0.1f%% " % (name, accuracy * 100))
-
-    # View probabilities:
-    probas = classifier.predict_proba(Xfull)
-    n_classes = np.unique(y_pred).size
-    for k in range(n_classes):
-        plt.subplot(n_classifiers, n_classes, index * n_classes + k + 1)
-        plt.title("Class %d" % k)
-        if k == 0:
-            plt.ylabel(name)
-        imshow_handle = plt.imshow(
-            probas[:, k].reshape((100, 100)), extent=(3, 9, 1, 5), origin="lower"
+    print(f"Accuracy (train) for {name}: {accuracy:0.1%}")
+    for label in np.unique(y):
+        # plot the probability estimate provided by the classifier
+        disp = DecisionBoundaryDisplay.from_estimator(
+            classifier,
+            X,
+            response_method="predict_proba",
+            class_of_interest=label,
+            ax=axes[classifier_idx, label],
+            vmin=0,
+            vmax=1,
+        )
+        axes[classifier_idx, label].set_title(f"Class {label}")
+        # plot data predicted to belong to given class
+        mask_y_pred = y_pred == label
+        axes[classifier_idx, label].scatter(
+            X[mask_y_pred, 0], X[mask_y_pred, 1], marker="o", c="w", edgecolor="k"
         )
-        plt.xticks(())
-        plt.yticks(())
-        idx = y_pred == k
-        if idx.any():
-            plt.scatter(X[idx, 0], X[idx, 1], marker="o", c="w", edgecolor="k")
+        axes[classifier_idx, label].set(xticks=(), yticks=())
+    axes[classifier_idx, 0].set_ylabel(name)
 
-ax = plt.axes([0.15, 0.04, 0.7, 0.05])
+ax = plt.axes([0.15, 0.04, 0.7, 0.02])
 plt.title("Probability")
-plt.colorbar(imshow_handle, cax=ax, orientation="horizontal")
+_ = plt.colorbar(
+    cm.ScalarMappable(norm=None, cmap="viridis"), cax=ax, orientation="horizontal"
+)
 
 plt.show()
diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py
@@ -1,19 +1,18 @@
-from functools import reduce
-
 import numpy as np
 
 from ...base import is_regressor
 from ...preprocessing import LabelEncoder
 from ...utils import _safe_indexing, check_matplotlib_support
+from ...utils._response import _get_response_values
 from ...utils.validation import (
     _is_arraylike_not_scalar,
     _num_features,
     check_is_fitted,
 )
 
 
-def _check_boundary_response_method(estimator, response_method):
-    """Return prediction method from the `response_method` for decision boundary.
+def _check_boundary_response_method(estimator, response_method, class_of_interest):
+    """Validate the response methods to be used with the fitted estimator.
 
     Parameters
     ----------
@@ -26,36 +25,38 @@ def _check_boundary_response_method(estimator, response_method):
         If set to 'auto', the response method is tried in the following order:
         :term:`decision_function`, :term:`predict_proba`, :term:`predict`.
 
+    class_of_interest : int, float, bool, str or None
+        The class considered when plotting the decision. If the label is specified, it
+        is then possible to plot the decision boundary in multiclass settings.
+
+        .. versionadded:: 1.4
+
     Returns
     -------
-    prediction_method: callable
-        Prediction method of estimator.
+    prediction_method : list of str or str
+        The name or list of names of the response methods to use.
     """
     has_classes = hasattr(estimator, "classes_")
     if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
         msg = "Multi-label and multi-output multi-class classifiers are not supported"
         raise ValueError(msg)
 
     if has_classes and len(estimator.classes_) > 2:
-        if response_method not in {"auto", "predict"}:
+        if response_method not in {"auto", "predict"} and class_of_interest is None:
             msg = (
-                "Multiclass classifiers are only supported when response_method is"
-                " 'predict' or 'auto'"
+                "Multiclass classifiers are only supported when `response_method` is "
+                "'predict' or 'auto'. Else you must provide `class_of_interest` to "
+                "plot the decision boundary of a specific class."
             )
             raise ValueError(msg)
-        methods_list = ["predict"]
+        prediction_method = "predict" if response_method == "auto" else response_method
     elif response_method == "auto":
-        methods_list = ["decision_function", "predict_proba", "predict"]
+        if is_regressor(estimator):
+            prediction_method = "predict"
+        else:
+            prediction_method = ["decision_function", "predict_proba", "predict"]
     else:
-        methods_list = [response_method]
-
-    prediction_method = [getattr(estimator, method, None) for method in methods_list]
-    prediction_method = reduce(lambda x, y: x or y, prediction_method)
-    if prediction_method is None:
-        raise ValueError(
-            f"{estimator.__class__.__name__} has none of the following attributes: "
-            f"{', '.join(methods_list)}."
-        )
+        prediction_method = response_method
 
     return prediction_method
 
@@ -206,6 +207,7 @@ def from_estimator(
         eps=1.0,
         plot_method="contourf",
         response_method="auto",
+        class_of_interest=None,
         xlabel=None,
         ylabel=None,
         ax=None,
@@ -248,6 +250,14 @@ def from_estimator(
             For multiclass problems, :term:`predict` is selected when
             `response_method="auto"`.
 
+        class_of_interest : int, float, bool or str, default=None
+            The class considered when plotting the decision. If None,
+            `estimator.classes_[1]` is considered as the positive class
+            for binary classifiers. For multiclass classifiers, passing
+            an explicit value for `class_of_interest` is mandatory.
+
+            .. versionadded:: 1.4
+
         xlabel : str, default=None
             The label used for the x-axis. If `None`, an attempt is made to
             extract a label from `X` if it is a dataframe, otherwise an empty
@@ -342,11 +352,30 @@ def from_estimator(
         else:
             X_grid = np.c_[xx0.ravel(), xx1.ravel()]
 
-        pred_func = _check_boundary_response_method(estimator, response_method)
-        response = pred_func(X_grid)
+        prediction_method = _check_boundary_response_method(
+            estimator, response_method, class_of_interest
+        )
+        try:
+            response, _, response_method_used = _get_response_values(
+                estimator,
+                X_grid,
+                response_method=prediction_method,
+                pos_label=class_of_interest,
+                return_response_method_used=True,
+            )
+        except ValueError as exc:
+            if "is not a valid label" in str(exc):
+                # re-raise a more informative error message since `pos_label` is unknown
+                # to our user when interacting with
+                # `DecisionBoundaryDisplay.from_estimator`
+                raise ValueError(
+                    f"class_of_interest={class_of_interest} is not a valid label: It "
+                    f"should be one of {estimator.classes_}"
+                ) from exc
+            raise
 
         # convert classes predictions into integers
-        if pred_func.__name__ == "predict" and hasattr(estimator, "classes_"):
+        if response_method_used == "predict" and hasattr(estimator, "classes_"):
             encoder = LabelEncoder()
             encoder.classes_ = estimator.classes_
             response = encoder.transform(response)
@@ -355,8 +384,11 @@ def from_estimator(
             if is_regressor(estimator):
                 raise ValueError("Multi-output regressors are not supported")
 
-            # TODO: Support pos_label
-            response = response[:, 1]
+            # For the multiclass case, `_get_response_values` returns the response
+            # as-is. Thus, we have a column per class and we need to select the column
+            # corresponding to the positive class.
+            col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
+            response = response[:, col_idx]
 
         if xlabel is None:
             xlabel = X.columns[0] if hasattr(X, "columns") else ""