scikit-learn · qinhanmin2014 · Aug 8, 2019 · Jul 5, 2019 · Jul 10, 2019 · Jul 10, 2019
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
@@ -1647,3 +1647,54 @@ make this task easier and faster (in no particular order).
     <https://git-scm.com/docs/git-grep#_examples>`_) is also extremely
     useful to see every occurrence of a pattern (e.g. a function call or a
     variable) in the code base.
+
+
+.. _plotting_api:
+
+Plotting API
+============
+
+Scikit-learn defines a simple API for creating visualizations for machine
+learning. The key features of this API is to run calculations once and to have
+the flexibility to adjust the visualizations after the fact. This logic is
+encapsulated into a display object where the computed data is stored and
+the plotting is done in a `plot` method. The display object's `__init__`
+method contains only the data needed to create the visualization. The `plot`
+method takes in parameters that only have to do with visualization, such as a
+matplotlib axes. The `plot` method will store the matplotlib artists as
+attributes allowing for style adjustments through the display object. A
+`plot_*` helper function accepts parameters to do the computation and the
+parameters used for plotting. After the helper function creates the display
+object with the computed values, it calls the display's plot method. Note
+that the `plot` method defines attributes related to matplotlib, such as the
+line artist. This allows for customizations after calling the `plot` method.
+
+For example, the `RocCurveDisplay` defines the following methods and
+attributes:
+
+.. code-block:: python
+
+   class RocCurveDisplay:
+       def __init__(self, fpr, tpr, roc_auc, estimator_name):
+           ...
+           self.fpr = fpr
+           self.tpr = tpr
+           self.roc_auc = roc_auc
+           self.estimator_name = estimator_name
+
+       def plot(self, ax=None, name=None, **kwargs):
+           ...
+           self.line_ = ...
+           self.ax_ = ax
+           self.figure_ = ax.figure_
+
+   def plot_roc_curve(estimator, X, y, pos_label=None, sample_weight=None,
+                      drop_intermediate=True, response_method="auto",
+                      name=None, ax=None, **kwargs):
+       # do computation
+       viz = RocCurveDisplay(fpr, tpr, roc_auc, 
+                                estimator.__class__.__name__)
+       return viz.plot(ax=ax, name=name, **kwargs)
+```
+
+Read more in the :ref:`User Guide <visualizations>`.
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -1007,6 +1007,26 @@ See the :ref:`metrics` section of the user guide for further details.
    metrics.pairwise_distances_chunked
 
 
+Plotting
+--------
+
+See the :ref:`visualizations` section of the user guide for further details.
+
+.. currentmodule:: sklearn
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   metrics.plot_roc_curve
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   metrics.RocCurveDisplay
+
+
 .. _mixture_ref:
 
 :mod:`sklearn.mixture`: Gaussian Mixture Models

diff --git a/doc/user_guide.rst b/doc/user_guide.rst
@@ -19,6 +19,7 @@ User Guide
    unsupervised_learning.rst
    model_selection.rst
    inspection.rst
+   visualizations.rst
    data_transforms.rst
    Dataset loading utilities <datasets/index.rst>
    modules/computing.rst
diff --git a/doc/visualizations.rst b/doc/visualizations.rst
@@ -0,0 +1,83 @@
+.. include:: includes/big_toc_css.rst
+
+.. _visualizations:
+
+==============
+Visualizations
+==============
+
+Scikit-learn defines a simple API for creating visualizations for machine
+learning. The key feature of this API is to allow for quick plotting and
+visual adjustments without recalculation. In the following example, we plot a
+ROC curve for a fitted support vector machine:
+
+.. code-block:: python
+
+    from sklearn.model_selection import train_test_split
+    from sklearn.svm import SVC
+    from sklearn.metrics import plot_roc_curve
+    from sklearn.datasets import load_wine
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+    svc = SVC(random_state=42)
+    svc.fit(X_train, y_train)
+
+    svc_disp = plot_roc_curve(svc, X_test, y_test)
+
+.. figure:: ../auto_examples/images/sphx_glr_plot_roc_curve_visualization_api_001.png
+    :target: ../auto_examples/plot_roc_curve_visualization_api.html
+    :align: center
+    :scale: 75%
+
+The returned `svc_disp` object allows us to continue using the already computed
+ROC curve for SVC in future plots. In this case, the `svc_disp` is a
+:class:`~sklearn.metrics.RocCurveDisplay` that stores the computed values as
+attributes called `roc_auc`, `fpr`, and `tpr`. Next, we train a random forest
+classifier and plot the previously computed roc curve again by using the `plot`
+method of the `Display` object.
+
+.. code-block:: python
+
+    import matplotlib.pyplot as plt
+    from sklearn.ensemble import RandomForestClassifier
+
+    rfc = RandomForestClassifier(random_state=42)
+    rfc.fit(X_train, y_train)
+
+    ax = plt.gca()
+    rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)
+    svc_disp.plot(ax=ax, alpha=0.8)
+
+.. figure:: ../auto_examples/images/sphx_glr_plot_roc_curve_visualization_api_002.png
+    :target: ../auto_examples/plot_roc_curve_visualization_api.html
+    :align: center
+    :scale: 75%
+
+Notice that we pass `alpha=0.8` to the plot functions to adjust the alpha
+values of the curves.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_plot_roc_curve_visualization_api.py`
+
+Available Plotting Utilities
+============================
+
+Functions
+---------
+
+.. currentmodule:: sklearn
+
+.. autosummary::
+
+   metrics.plot_roc_curve
+
+
+Display Objects
+---------------
+
+.. currentmodule:: sklearn
+
+.. autosummary::
+
+   metrics.RocCurveDisplay
diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
@@ -36,7 +36,8 @@
 import matplotlib.pyplot as plt
 
 from sklearn import svm, datasets
-from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import auc
+from sklearn.metrics import plot_roc_curve
 from sklearn.model_selection import StratifiedKFold
 
 # #############################################################################
@@ -65,40 +66,35 @@
 aucs = []
 mean_fpr = np.linspace(0, 1, 100)
 
-i = 0
-for train, test in cv.split(X, y):
-    probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
-    # Compute ROC curve and area the curve
-    fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
-    tprs.append(interp(mean_fpr, fpr, tpr))
-    tprs[-1][0] = 0.0
-    roc_auc = auc(fpr, tpr)
-    aucs.append(roc_auc)
-    plt.plot(fpr, tpr, lw=1, alpha=0.3,
-             label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
-
-    i += 1
-plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
-         label='Chance', alpha=.8)
+fig, ax = plt.subplots()
+for i, (train, test) in enumerate(cv.split(X, y)):
+    classifier.fit(X[train], y[train])
+    viz = plot_roc_curve(classifier, X[test], y[test],
+                         name='ROC fold {}'.format(i),
+                         alpha=0.3, lw=1, ax=ax)
+    interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr)
+    interp_tpr[0] = 0.0
+    tprs.append(interp_tpr)
+    aucs.append(viz.roc_auc)
+
+ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
+        label='Chance', alpha=.8)
 
 mean_tpr = np.mean(tprs, axis=0)
 mean_tpr[-1] = 1.0
 mean_auc = auc(mean_fpr, mean_tpr)
 std_auc = np.std(aucs)
-plt.plot(mean_fpr, mean_tpr, color='b',
-         label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
-         lw=2, alpha=.8)
+ax.plot(mean_fpr, mean_tpr, color='b',
+        label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
+        lw=2, alpha=.8)
 
 std_tpr = np.std(tprs, axis=0)
 tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
 tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
-plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
-                 label=r'$\pm$ 1 std. dev.')
-
-plt.xlim([-0.05, 1.05])
-plt.ylim([-0.05, 1.05])
-plt.xlabel('False Positive Rate')
-plt.ylabel('True Positive Rate')
-plt.title('Receiver operating characteristic example')
-plt.legend(loc="lower right")
+ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
+                label=r'$\pm$ 1 std. dev.')
+
+ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
+       title="Receiver operating characteristic example")
+ax.legend(loc="lower right")
 plt.show()
diff --git a/examples/plot_roc_curve_visualization_api.py b/examples/plot_roc_curve_visualization_api.py
@@ -0,0 +1,55 @@
+"""
+================================
+ROC Curve with Visualization API
+================================
+Scikit-learn defines a simple API for creating visualizations for machine
+learning. The key features of this API is to allow for quick plotting and
+visual adjustments without recalculation. In this example, we will demonstrate
+how to use the visualization API by comparing ROC curves.
+"""
+print(__doc__)
+
+##############################################################################
+# Load Data and Train a SVC
+# -------------------------
+# First, we load the wine dataset and convert it to a binary classification
+# problem. Then, we train a support vector classifier on a training dataset.
+import matplotlib.pyplot as plt
+from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import plot_roc_curve
+from sklearn.datasets import load_wine
+from sklearn.model_selection import train_test_split
+
+X, y = load_wine(return_X_y=True)
+y = y == 2
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+svc = SVC(random_state=42)
+svc.fit(X_train, y_train)
+
+##############################################################################
+# Plotting the ROC Curve
+# ----------------------
+# Next, we plot the ROC curve with a single call to
+# :func:`sklearn.metrics.plot_roc_curve`. The returned `svc_disp` object allows
+# us to continue using the already computed ROC curve for the SVC in future
+# plots.
+svc_disp = plot_roc_curve(svc, X_test, y_test)
+plt.show()
+
+##############################################################################
+# Training a Random Forest and Plotting the ROC Curve
+# --------------------------------------------------------
+# We train a random forest classifier and create a plot comparing it to the SVC
+# ROC curve. Notice how `svc_disp` uses
+# :func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve
+# without recomputing the values of the roc curve itself. Futhermore, we
+# pass `alpha=0.8` to the plot functions to adjust the alpha values of the
+# curves.
+rfc = RandomForestClassifier(n_estimators=10, random_state=42)
+rfc.fit(X_train, y_train)
+ax = plt.gca()
+rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=ax, alpha=0.8)
+svc_disp.plot(ax=ax, alpha=0.8)
+plt.show()
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -74,6 +74,10 @@
 from .scorer import SCORERS
 from .scorer import get_scorer
 
+from ._plot.roc_curve import plot_roc_curve
+from ._plot.roc_curve import RocCurveDisplay
+
+
 __all__ = [
     'accuracy_score',
     'adjusted_mutual_info_score',
@@ -125,11 +129,13 @@
     'pairwise_distances_argmin_min',
     'pairwise_distances_chunked',
     'pairwise_kernels',
+    'plot_roc_curve',
     'precision_recall_curve',
     'precision_recall_fscore_support',
     'precision_score',
     'r2_score',
     'recall_score',
+    'RocCurveDisplay',
     'roc_auc_score',
     'roc_curve',
     'SCORERS',

diff --git a/sklearn/metrics/_plot/__init__.py b/sklearn/metrics/_plot/__init__.py