scikit-learn-contrib
diff --git a/‎doc/Makefile
Lines changed: 0 additions & 1 deletion b/‎doc/Makefile
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/images/quickstart_1.png
-66.1 KB b/‎doc/images/quickstart_1.png
-66.1 KB
diff --git a/‎doc/images/quickstart_2.png
-190 KB b/‎doc/images/quickstart_2.png
-190 KB
diff --git a/‎doc/index_classification.rst
Lines changed: 2 additions & 1 deletion b/‎doc/index_classification.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/index_regression.rst
Lines changed: 1 addition & 1 deletion b/‎doc/index_regression.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/notebooks_classification.rst
Lines changed: 0 additions & 10 deletions b/‎doc/notebooks_classification.rst
Lines changed: 0 additions & 10 deletions
diff --git a/‎doc/quick_start.rst
Lines changed: 3 additions & 134 deletions b/‎doc/quick_start.rst
Lines changed: 3 additions & 134 deletions
diff --git a/‎examples/classification/1-quickstart/plot_quickstart_classification.py
Lines changed: 119 additions & 0 deletions b/‎examples/classification/1-quickstart/plot_quickstart_classification.py
Lines changed: 119 additions & 0 deletions
@@ -64,7 +64,6 @@ html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-	cp _build/html/_images/sphx_glr_plot_toy_model_001.png images/quickstart_1.png
 
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 
@@ -2,9 +2,10 @@ Prediction sets (classification)
 ================================
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
 
    choosing_the_right_algorithm_classification
+   examples_classification/1-quickstart/plot_quickstart_classification
    examples_classification/index
    theoretical_description_classification
    index_binary_classification
@@ -2,7 +2,7 @@ Prediction intervals (regression)
 =================================
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
 
    choosing_the_right_algorithm_regression
    examples_regression/1-quickstart/plot_toy_model
 
@@ -34,141 +34,10 @@ To install directly from the github repository :
 =====================
 
 Let us start with a basic regression problem.
-Here, we generate one-dimensional noisy data that we fit with a linear model.
+Here, we generate one-dimensional noisy data that we fit with a MLPRegressor: `Use MAPIE to plot prediction intervals <https://mapie.readthedocs.io/en/stable/examples_regression/1-quickstart/plot_toy_model.html>`_
 
-..
-    Comment to developers: the following piece of code is heavily inspired by `examples/regression/1-quickstart/plot_toy_model.py`.
-    When updating it, please replicate the changes to this other file.
 
-.. testcode::
-
-    import numpy as np
-    from sklearn.datasets import make_regression
-    from sklearn.model_selection import train_test_split
-
-    X, y = make_regression(n_samples=500, n_features=1, noise=20)
-
-    X_train, X_temp, y_train, y_temp = train_test_split(X, y)
-    X_test, X_conformalize, y_test, y_conformalize = train_test_split(X_temp, y_temp)
-
-    #  We follow a sequential ``fit``, ``conformalize``, and ``predict`` process.
-    #  We set the confidence level to estimate prediction intervals at approximately one and two
-    #  standard deviation from the mean.
-
-    from mapie.regression import SplitConformalRegressor
-
-    mapie_regressor = SplitConformalRegressor(confidence_level=[0.95, 0.68], prefit=False)
-    mapie_regressor.fit(X_train, y_train)
-    mapie_regressor.conformalize(X_conformalize, y_conformalize)
-
-    y_pred, y_pred_intervals = mapie_regressor.predict_interval(X_test)
-
-    #  MAPIE's ``predict`` method returns point predictions as a ``np.ndarray`` of shape ``(n_samples)``.
-    #  The ``predict_set`` method returns prediction intervals as a ``np.ndarray`` of shape ``(n_samples, 2, 2)``
-    #  giving the lower and upper bounds of the intervals for each confidence level.
-
-    # You can compute the coverage of your prediction intervals.
-
-    from mapie.metrics.regression import regression_coverage_score
-
-    coverage_scores = regression_coverage_score(y_test, y_pred_intervals)
-
-    #  The estimated prediction intervals can then be plotted as follows.
-
-    from matplotlib import pyplot as plt
-
-    confidence_level = [0.95, 0.68]
-
-    plt.xlabel("x")
-    plt.ylabel("y")
-    plt.scatter(X, y, alpha=0.3)
-    plt.plot(X_test, y_pred, color="C1")
-    order = np.argsort(X_test[:, 0])
-    plt.plot(X_test[order], y_pred_intervals[order, 0], color="C1", ls="--")
-    plt.plot(X_test[order], y_pred_intervals[order, 1], color="C1", ls="--")
-    plt.fill_between(
-        X_test[order].ravel(),
-        y_pred_intervals[order][:, 0, 0].ravel(),
-        y_pred_intervals[order][:, 1, 0].ravel(),
-        alpha=0.2
-    )
-    plt.title(
-        f"Effective coverage for "
-        f"confidence_level={confidence_level[0]:.2f}: {coverage_scores[0]:.3f}\n"
-        f"Effective coverage for "
-        f"confidence_level={confidence_level[1]:.2f}: {coverage_scores[1]:.3f}"
-    )
-    plt.show()
-
-.. image:: images/quickstart_1.png
-    :width: 400
-    :align: center
-
-The title of the plot compares the target coverages with the effective coverages.
-The target coverage, or the confidence level, is the fraction of true labels lying in the
-prediction intervals that we aim to obtain for a given dataset.
-It is given by the ``confidence_level`` parameter defined in ``SplitConformalRegressor``, here equal to ``0.95`` and ``0.68``.
-The effective coverage is the actual fraction of true labels lying in the prediction intervals.
-
-3. Run _MapieClassifier
+3. Classification
 =======================
 
-Similarly, it's possible to do the same for a basic classification problem.
-
-.. code:: python
-
-    import numpy as np
-    from sklearn.linear_model import LogisticRegression
-    from sklearn.datasets import make_blobs
-    from sklearn.model_selection import train_test_split
-
-    classifier = LogisticRegression()
-    X, y = make_blobs(n_samples=500, n_features=2, centers=3)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
-
-.. code:: python
-
-    from mapie.classification import _MapieClassifier
-
-    mapie_classifier = _MapieClassifier(estimator=classifier, method='score', cv=5)
-    mapie_classifier = mapie_classifier.fit(X_train, y_train)
-
-    alpha = [0.05, 0.32]
-    y_pred, y_pis = mapie_classifier.predict(X_test, alpha=alpha)
-
-.. code:: python
-
-    from mapie.metrics import classification_coverage_score
-
-    coverage_scores = classification_coverage_score(y_test, y_pis)
-
-.. code:: python
-
-    from matplotlib import pyplot as plt
-
-    x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0])
-    y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1])
-    step = 0.1
-
-    xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
-    X_test_mesh = np.stack([xx.ravel(), yy.ravel()], axis=1)
-
-    y_pis = mapie_classifier.predict(X_test_mesh, alpha=alpha)[1][:,:,0]
-
-    plt.scatter(
-        X_test_mesh[:, 0], X_test_mesh[:, 1],
-        c=np.ravel_multi_index(y_pis.T, (2,2,2)),
-        marker='.', s=10, alpha=0.2
-    )
-    plt.scatter(X[:, 0], X[:, 1], c=y, cmap='tab20c')
-    plt.xlabel("x1")
-    plt.ylabel("x2")
-    plt.title(
-        f"Target and effective coverages for "
-        f"alpha={alpha[0]:.2f}: ({1-alpha[0]:.3f}, {coverage_scores[0]:.3f})"
-    )
-    plt.show()
-
-.. image:: images/quickstart_2.png
-    :width: 400
-    :align: center
+Similarly, it's possible to do the same for a basic classification problem: `Use MAPIE to plot prediction sets <https://mapie.readthedocs.io/en/stable/examples_classification/1-quickstart/plot_quickstart_classification.html>`_
@@ -0,0 +1,119 @@
+"""
+======================================================
+Use MAPIE to plot prediction sets
+======================================================
+
+In this example, we explain how to use MAPIE on a basic classification setting.
+"""
+
+##################################################################################
+# We will use MAPIE to estimate prediction sets on a two-dimensional dataset with
+# three labels.
+
+import numpy as np
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.datasets import make_blobs
+from matplotlib import pyplot as plt
+from matplotlib.colors import ListedColormap
+from mapie.utils import train_conformalize_test_split
+from mapie.classification import SplitConformalClassifier
+from mapie.metrics.classification import classification_coverage_score
+
+np.random.seed(42)
+
+##############################################################################
+# Firstly, let us create our dataset:
+
+X, y = make_blobs(n_samples=500, n_features=2, centers=3, cluster_std=3.4)
+
+(X_train, X_conformalize, X_test,
+ y_train, y_conformalize, y_test) = train_conformalize_test_split(
+    X, y, train_size=0.4, conformalize_size=0.4, test_size=0.2
+)
+
+##############################################################################
+# We fit our training data with a KNN estimator.
+# Then, we initialize a :class:`~mapie.classification.SplitConformalClassifier`
+# using our estimator, indicating that it has already been fitted with
+# `prefit=True`.
+# Lastly, we compute the prediction sets with the desired confidence level using the
+# ``conformalize`` and ``predict_set`` methods.
+
+classifier = KNeighborsClassifier(n_neighbors=10)
+classifier.fit(X_train, y_train)
+
+confidence_level = 0.95
+mapie_classifier = SplitConformalClassifier(
+    estimator=classifier, confidence_level=confidence_level, prefit=True
+)
+mapie_classifier.conformalize(X_conformalize, y_conformalize)
+y_pred, y_pred_set = mapie_classifier.predict_set(X_test)
+
+##############################################################################
+# ``y_pred`` represents the point predictions as a ``np.ndarray`` of shape
+# ``(n_samples)``.
+# ``y_pred_set`` corresponds to the prediction sets as a ``np.ndarray`` of shape
+# ``(n_samples, 3, 1)``. This array contains only boolean values: ``True`` if the label
+# is included in the prediction set, and ``False`` if not.
+
+##############################################################################
+# Finally, we can easily compute the coverage score (i.e., the proportion of times the
+# true labels fall within the predicted sets).
+
+coverage_score = classification_coverage_score(y_test, y_pred_set)
+print(f"For a confidence level of {confidence_level:.2f}, "
+      f"the target coverage is {confidence_level:.3f}, "
+      f"and the effective coverage is {coverage_score[0]:.3f}.")
+
+##############################################################################
+# In this example, the effective coverage is slightly above the target coverage
+# (i.e., 0.95), indicating that the confidence level we set has been reached.
+# Therefore, we can confirm that the prediction sets effectively contain the
+# true label more than 95% of the time.
+
+##############################################################################
+# Now, let us plot the confidence regions across the plane.
+# This plot will give us insights about what the prediction set looks like for each
+# point.
+
+x_min, x_max = np.min(X[:, 0]), np.max(X[:, 0])
+y_min, y_max = np.min(X[:, 1]), np.max(X[:, 1])
+step = 0.1
+
+xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
+X_test_mesh = np.stack([xx.ravel(), yy.ravel()], axis=1)
+
+y_pred_set = mapie_classifier.predict_set(X_test_mesh)[1][:, :, 0]
+
+cmap_back = ListedColormap(
+    [(0.7803921568627451, 0.9137254901960784, 0.7529411764705882),
+     (0.9921568627450981, 0.8156862745098039, 0.6352941176470588),
+     (0.6196078431372549, 0.6039215686274509, 0.7843137254901961),
+     (0.7764705882352941, 0.8588235294117647, 0.9372549019607843),
+     (0.6196078431372549, 0.6039215686274509, 0.7843137254901961),
+     (0.6196078431372549, 0.6039215686274509, 0.7843137254901961)]
+)
+cmap_dots = ListedColormap(
+    [(0.19215686274509805, 0.5098039215686274, 0.7411764705882353),
+     (0.9019607843137255, 0.3333333333333333, 0.050980392156862744),
+     (0.19215686274509805, 0.6392156862745098, 0.32941176470588235)]
+)
+
+plt.scatter(
+   X_test_mesh[:, 0], X_test_mesh[:, 1],
+   c=np.ravel_multi_index(y_pred_set.T, (2, 2, 2)),
+   cmap=cmap_back, marker='.', s=10
+)
+plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_dots)
+plt.xlabel("x1")
+plt.ylabel("x2")
+plt.title("Confidence regions with KNN")
+plt.show()
+
+##############################################################################
+# On the plot above, the dots represent the samples from our dataset, with their
+# color indicating their respective label.
+# The blue, orange and green zones correspond to prediction sets
+# containing only the blue label, orange label and green label respectively.
+# The purple zone represents areas where the prediction sets contain more than one
+# label, indicating that the model is uncertain.