scikit-learn · jeremiedbb · May 3, 2024 · Mar 26, 2024 · Apr 7, 2024 · Apr 7, 2024
diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py
@@ -52,7 +52,6 @@ def make_estimator(self, params):
         estimator = LogisticRegression(
             solver=solver,
             penalty=penalty,
-            multi_class="multinomial",
             tol=0.01,
             n_jobs=n_jobs,
             random_state=0,

diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
@@ -20,6 +20,7 @@
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
+from sklearn.multiclass import OneVsRestClassifier
 from sklearn.preprocessing import LabelBinarizer, LabelEncoder
 from sklearn.utils.extmath import safe_sparse_dot, softmax
 from sklearn.utils.parallel import Parallel, delayed
@@ -95,14 +96,15 @@ def fit_single(
         else:
             lr = LogisticRegression(
                 solver=solver,
-                multi_class=multi_class,
                 C=C,
                 penalty=penalty,
                 fit_intercept=False,
                 tol=0,
                 max_iter=this_max_iter,
                 random_state=42,
             )
+            if multi_class == "ovr":
+                lr = OneVsRestClassifier(lr)
 
         # Makes cpu cache even for all fit calls
         X_train.max()
@@ -118,8 +120,12 @@ def fit_single(
             except NotImplementedError:
                 # Lightning predict_proba is not implemented for n_classes > 2
                 y_pred = _predict_proba(lr, X)
+            if isinstance(lr, OneVsRestClassifier):
+                coef = np.concatenate([est.coef_ for est in lr.estimators_])
+            else:
+                coef = lr.coef_
             score = log_loss(y, y_pred, normalize=False) / n_samples
-            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(np.abs(lr.coef_))
+            score += 0.5 * alpha * np.sum(coef**2) + beta * np.sum(np.abs(coef))
             scores.append(score)
         train_score, test_score = tuple(scores)
 
@@ -133,6 +139,7 @@ def fit_single(
 
 
 def _predict_proba(lr, X):
+    """Predict proba for lightning for n_classes >=3."""
     pred = safe_sparse_dot(X, lr.coef_.T)
     if hasattr(lr, "intercept_"):
         pred += lr.intercept_

diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
@@ -353,7 +353,7 @@ The parameter `deep` will control whether or not the parameters of the
     subestimator__intercept_scaling -> 1
     subestimator__l1_ratio -> None
     subestimator__max_iter -> 100
-    subestimator__multi_class -> auto
+    subestimator__multi_class -> deprecated
     subestimator__n_jobs -> None
     subestimator__penalty -> l2
     subestimator__random_state -> None

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -1047,24 +1047,24 @@ Solvers
 The solvers implemented in the class :class:`LogisticRegression`
 are "lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag" and "saga":
 
-The following table summarizes the penalties supported by each solver:
+The following table summarizes the penalties and multinomial multiclass supported by each solver:
 
 +------------------------------+-----------------+-------------+-----------------+-----------------------+-----------+------------+
 |                              |                       **Solvers**                                                                |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
 | **Penalties**                | **'lbfgs'** | **'liblinear'** | **'newton-cg'** | **'newton-cholesky'** | **'sag'** | **'saga'** |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| Multinomial + L2 penalty     |     yes     |       no        |       yes       |     no                |    yes    |    yes     |
+| L2 penalty                   |     yes     |       no        |       yes       |     no                |    yes    |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| OVR + L2 penalty             |     yes     |       yes       |       yes       |     yes               |    yes    |    yes     |
+| L1 penalty                   |     no      |       yes       |       no        |     no                |    no     |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| Multinomial + L1 penalty     |     no      |       no        |       no        |     no                |    no     |    yes     |
+| Elastic-Net (L1 + L2)        |     no      |       no        |       no        |     no                |    no     |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| OVR + L1 penalty             |     no      |       yes       |       no        |     no                |    no     |    yes     |
+| No penalty ('none')          |     yes     |       no        |       yes       |     yes               |    yes    |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| Elastic-Net                  |     no      |       no        |       no        |     no                |    no     |    yes     |
+| **Multiclass support**       |                                                                                                  |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| No penalty ('none')          |     yes     |       no        |       yes       |     yes               |    yes    |    yes     |
+| multinomial multiclass       |     yes     |       no        |       yes       |     no                |    yes    |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
 | **Behaviors**                |                                                                                                  |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+

diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst
@@ -63,8 +63,8 @@ can provide additional strategies beyond what is built-in:
   - :class:`semi_supervised.LabelSpreading`
   - :class:`discriminant_analysis.LinearDiscriminantAnalysis`
   - :class:`svm.LinearSVC` (setting multi_class="crammer_singer")
-  - :class:`linear_model.LogisticRegression` (setting multi_class="multinomial")
-  - :class:`linear_model.LogisticRegressionCV` (setting multi_class="multinomial")
+  - :class:`linear_model.LogisticRegression` (with most solvers)
+  - :class:`linear_model.LogisticRegressionCV` (with most solvers)
   - :class:`neural_network.MLPClassifier`
   - :class:`neighbors.NearestCentroid`
   - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
@@ -86,8 +86,8 @@ can provide additional strategies beyond what is built-in:
   - :class:`ensemble.GradientBoostingClassifier`
   - :class:`gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest")
   - :class:`svm.LinearSVC` (setting multi_class="ovr")
-  - :class:`linear_model.LogisticRegression` (setting multi_class="ovr")
-  - :class:`linear_model.LogisticRegressionCV` (setting multi_class="ovr")
+  - :class:`linear_model.LogisticRegression` (most solvers)
+  - :class:`linear_model.LogisticRegressionCV` (most solvers)
   - :class:`linear_model.SGDClassifier`
   - :class:`linear_model.Perceptron`
   - :class:`linear_model.PassiveAggressiveClassifier`

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
@@ -357,6 +357,14 @@ Changelog
   :class:`linear_model.SGDOneClassSVM`. Pass `average=False` instead.
   :pr:`28582` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |API| Parameter `multi_class` was deprecated in
+  :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV`. `multi_class` will be removed in 1.7,
+  and internally, for 3 and more classes, it will always use multinomial.
+  If you still want to use the one-vs-rest scheme, you can use
+  `OneVsRestClassifier(LogisticRegression(..))`.
+  :pr:`28703` by :user:`Christian Lorentzen <lorentzenchr>`.
+
 - |API| `store_cv_values` and `cv_values_` are deprecated in favor of
   `store_cv_results` and `cv_results_` in `RidgeCV` and `RidgeClassifierCV`.
   :pr:`28915` by :user:`Lucy Liu <lucyleeow>`.

diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
@@ -5,8 +5,8 @@
 
 Plot the classification probability for different classifiers. We use a 3 class
 dataset, and we classify it with a Support Vector classifier, L1 and L2
-penalized logistic regression with either a One-Vs-Rest or multinomial setting,
-and Gaussian process classification.
+penalized logistic regression (multinomial multiclass), a One-Vs-Rest version with
+logistic regression, and Gaussian process classification.
 
 Linear SVC is not a probabilistic classifier by default but it has a built-in
 calibration option enabled in this example (`probability=True`).
@@ -30,6 +30,7 @@
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import accuracy_score
+from sklearn.multiclass import OneVsRestClassifier
 from sklearn.svm import SVC
 
 iris = datasets.load_iris()
@@ -43,14 +44,12 @@
 
 # Create different classifiers.
 classifiers = {
-    "L1 logistic": LogisticRegression(
-        C=C, penalty="l1", solver="saga", multi_class="multinomial", max_iter=10000
-    ),
+    "L1 logistic": LogisticRegression(C=C, penalty="l1", solver="saga", max_iter=10000),
     "L2 logistic (Multinomial)": LogisticRegression(
-        C=C, penalty="l2", solver="saga", multi_class="multinomial", max_iter=10000
+        C=C, penalty="l2", solver="saga", max_iter=10000
     ),
-    "L2 logistic (OvR)": LogisticRegression(
-        C=C, penalty="l2", solver="saga", multi_class="ovr", max_iter=10000
+    "L2 logistic (OvR)": OneVsRestClassifier(
+        LogisticRegression(C=C, penalty="l2", solver="saga", max_iter=10000)
     ),
     "Linear SVC": SVC(kernel="linear", C=C, probability=True, random_state=0),
     "GPC": GaussianProcessClassifier(kernel),

diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py
@@ -18,6 +18,7 @@
 from sklearn.datasets import make_blobs
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.linear_model import LogisticRegression
+from sklearn.multiclass import OneVsRestClassifier
 
 # make 3-class dataset for classification
 centers = [[-5, 0], [0, 1.5], [5, -1]]
@@ -26,9 +27,10 @@
 X = np.dot(X, transformation)
 
 for multi_class in ("multinomial", "ovr"):
-    clf = LogisticRegression(
-        solver="sag", max_iter=100, random_state=42, multi_class=multi_class
-    ).fit(X, y)
+    clf = LogisticRegression(solver="sag", max_iter=100, random_state=42)
+    if multi_class == "ovr":
+        clf = OneVsRestClassifier(clf)
+    clf.fit(X, y)
 
     # print the training scores
     print("training score : %.3f (%s)" % (clf.score(X, y), multi_class))
@@ -51,8 +53,12 @@
     # Plot the three one-against-all classifiers
     xmin, xmax = plt.xlim()
     ymin, ymax = plt.ylim()
-    coef = clf.coef_
-    intercept = clf.intercept_
+    if multi_class == "ovr":
+        coef = np.concatenate([est.coef_ for est in clf.estimators_])
+        intercept = np.concatenate([est.intercept_ for est in clf.estimators_])
+    else:
+        coef = clf.coef_
+        intercept = clf.intercept_
 
     def plot_hyperplane(c, color):
         def line(x0):

diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
@@ -32,6 +32,7 @@
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
+from sklearn.multiclass import OneVsRestClassifier
 
 warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
 t0 = timeit.default_timer()
@@ -76,20 +77,25 @@
             "[model=%s, solver=%s] Number of epochs: %s"
             % (model_params["name"], solver, this_max_iter)
         )
-        lr = LogisticRegression(
+        clf = LogisticRegression(
             solver=solver,
-            multi_class=model,
             penalty="l1",
             max_iter=this_max_iter,
             random_state=42,
         )
+        if model == "ovr":
+            clf = OneVsRestClassifier(clf)
         t1 = timeit.default_timer()
-        lr.fit(X_train, y_train)
+        clf.fit(X_train, y_train)
         train_time = timeit.default_timer() - t1
 
-        y_pred = lr.predict(X_test)
+        y_pred = clf.predict(X_test)
         accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
-        density = np.mean(lr.coef_ != 0, axis=1) * 100
+        if model == "ovr":
+            coef = np.concatenate([est.coef_ for est in clf.estimators_])
+        else:
+            coef = clf.coef_
+        density = np.mean(coef != 0, axis=1) * 100
         accuracies.append(accuracy)
         densities.append(density)
         times.append(train_time)

diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
@@ -287,7 +287,7 @@ class VotingClassifier(ClassifierMixin, _BaseVoting):
     >>> from sklearn.linear_model import LogisticRegression
     >>> from sklearn.naive_bayes import GaussianNB
     >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-    >>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
+    >>> clf1 = LogisticRegression(random_state=1)
     >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
     >>> clf3 = GaussianNB()
     >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])