From 8939713421c7b9913ea1b04b0a362eb79900e798 Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Mon, 16 Oct 2017 11:36:49 +0200
Subject: [PATCH 1/6] Incorrect multinomial logistic regression predict_proba
 test added (#9889)

---
 sklearn/linear_model/tests/test_logistic.py | 29 +++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index ea4300df01100..eefbaf4bfc1fd 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -565,6 +565,35 @@ def test_ovr_multinomial_iris():
         assert_equal(scores.shape, (3, n_cv, 10))
 
 
+def test_ovr_multinomial_iris_binary():
+    # Test that multinomial gives better predictions on binary outcome iris
+    # model with respect to log loss and negligible regularisation.
+    train, target = iris.data, iris.target
+    n_samples, n_features = train.shape
+
+    # Conflate classes 0 and 1 and train ovr and multinomial models on this
+    # modified dataset
+    target_copy = target.copy()
+    target_copy[target_copy == 0] = 1
+
+    clf_ovr = LogisticRegression(C=10000, multi_class='ovr',
+                                 solver='saga', max_iter=100000)
+    clf_multi = LogisticRegression(C=10000, multi_class='multinomial',
+                                   solver='saga', max_iter=100000)
+    clf_ovr.fit(train, target_copy)
+    clf_multi.fit(train, target_copy)
+
+    # Test that for the iris data multinomial gives a better accuracy than OvR
+    # on the conflated 2 class dataset with respect to log loss
+    predicted_ovr = clf_ovr.predict_proba(train)
+    predicted_multi = clf_multi.predict_proba(train)
+
+    ovr_log_loss = log_loss(target_copy, predicted_ovr)
+    multi_log_loss = log_loss(target_copy, predicted_multi)
+
+    assert_greater(ovr_log_loss, multi_log_loss)
+
+
 def test_logistic_regression_solvers():
     X, y = make_classification(n_features=10, n_informative=5, random_state=0)
 

From 4ecac9ed67a1574c19f6756ccdb6cd9baf48ada9 Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Mon, 16 Oct 2017 11:38:50 +0200
Subject: [PATCH 2/6] Fixed incorrect multinomial logistic regression
 predict_proba (#9889)

---
 sklearn/linear_model/logistic.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 59e6db8457a45..134c1f44d33e4 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1331,9 +1331,13 @@ def predict_proba(self, X):
         """
         if not hasattr(self, "coef_"):
             raise NotFittedError("Call fit before prediction")
-        calculate_ovr = self.coef_.shape[0] == 1 or self.multi_class == "ovr"
-        if calculate_ovr:
+        if self.multi_class == "ovr":
             return super(LogisticRegression, self)._predict_proba_lr(X)
+        elif self.coef_.shape[0] == 1:
+            # Workaround for multi_class="multinomial" and binary outcomes
+            # which requires softmax prediction with only a 1D coef_ vector.
+            decision = self.decision_function(X)
+            return softmax(np.c_[-decision, decision], copy=False)
         else:
             return softmax(self.decision_function(X), copy=False)
 

From ae22f35b1098dece745421dca0809b154f054746 Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Mon, 16 Oct 2017 11:55:08 +0200
Subject: [PATCH 3/6] Updated what's new for multinomial logistic regression
 predictions (#9889)

---
 doc/whats_new/v0.20.rst                     | 5 +++++
 sklearn/linear_model/tests/test_logistic.py | 5 ++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 38bd521412926..4ea1728965b1d 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -85,6 +85,11 @@ Classifiers and regressors
   identical X values.
   :issue:`9432` by :user:`Dallas Card <dallascard>`
 
+- Fixed a bug in :class:`linear_model.LogisticRegression` where when using the
+  parameter ``multi_class='multinomial'``, the ``predict_proba`` method was
+  returning incorrect probabilities in the case of binary outcomes.
+  :issue:`9889`. By user `rwolst`.
+
 Decomposition, manifold learning and clustering
 
 - Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index eefbaf4bfc1fd..8af93eba16cf6 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -567,7 +567,10 @@ def test_ovr_multinomial_iris():
 
 def test_ovr_multinomial_iris_binary():
     # Test that multinomial gives better predictions on binary outcome iris
-    # model with respect to log loss and negligible regularisation.
+    # model with respect to log loss and negligible regularisation. Note that
+    # it should be slightly greater as the parameters of the multinomial
+    # model are roughly half those of ovr so are slightly less affected by
+    # regularisation and can make slightly better estimates.
     train, target = iris.data, iris.target
     n_samples, n_features = train.shape
 

From 70e46b4bfd2abd78e38342c5fdea03adb0a756ad Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Tue, 17 Oct 2017 12:41:04 +0200
Subject: [PATCH 4/6] Updated doc string for coef_ and intercept_ (#9889)

---
 sklearn/linear_model/logistic.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 134c1f44d33e4..466117c85b435 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1102,13 +1102,18 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin,
         Coefficient of the features in the decision function.
 
         `coef_` is of shape (1, n_features) when the given problem
-        is binary.
+        is binary and in the case when `multi_class='multinomial'`, then
+        `coef_` are the coefficients for outcome 1 (True) and `-coef_` are
+        the coefficients for outcome 0 (False).
 
     intercept_ : array, shape (1,) or (n_classes,)
         Intercept (a.k.a. bias) added to the decision function.
 
         If `fit_intercept` is set to False, the intercept is set to zero.
-        `intercept_` is of shape(1,) when the problem is binary.
+        `intercept_` is of shape(1,) when the problem is binary and in the
+        case when `multi_class='multinomial'`, then `intercept_` is the
+        intercept term for outcome 1 (True) and `-intercept_` is the intercept
+        term for outcome 0 (False).
 
     n_iter_ : array, shape (n_classes,) or (1, )
         Actual number of iterations for all classes. If binary or multinomial,

From fd59597863ced9b80605204f163c59d4c7ca392c Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Wed, 8 Nov 2017 16:27:07 +0100
Subject: [PATCH 5/6] Fixed :user: formatting in whats new

---
 doc/whats_new/v0.20.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 4c1a699a6239b..2e24cde966b5e 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -123,7 +123,7 @@ Classifiers and regressors
 - Fixed a bug in :class:`linear_model.LogisticRegression` where when using the
   parameter ``multi_class='multinomial'``, the ``predict_proba`` method was
   returning incorrect probabilities in the case of binary outcomes.
-  :issue:`9939` by user `rwolst`.
+  :issue:`9939` by :user: `rwolst`.
 
 Decomposition, manifold learning and clustering
 

From fb88a0fcb35f2220fe78c186dd8511993b47543b Mon Sep 17 00:00:00 2001
From: Rob <rwolst90@gmail.com>
Date: Sat, 6 Jan 2018 11:30:49 +0100
Subject: [PATCH 6/6] Simplified regression test

---
 doc/whats_new/v0.20.rst                     |  2 +-
 sklearn/linear_model/logistic.py            | 14 +++---
 sklearn/linear_model/tests/test_logistic.py | 49 +++++++--------------
 3 files changed, 26 insertions(+), 39 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 5a02d22a12795..a2d5e25c6a211 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -183,7 +183,7 @@ Classifiers and regressors
 - Fixed a bug in :class:`linear_model.LogisticRegression` where when using the
   parameter ``multi_class='multinomial'``, the ``predict_proba`` method was
   returning incorrect probabilities in the case of binary outcomes.
-  :issue:`9939` by :user: `rwolst`.
+  :issue:`9939` by :user:`Roger Westover <rwolst>`.
 
 - Fixed a bug in :class:`linear_model.OrthogonalMatchingPursuit` that was
   broken when setting ``normalize=False``.
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index b8effabe87480..c7fa9dc4d0501 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -1338,13 +1338,15 @@ def predict_proba(self, X):
             raise NotFittedError("Call fit before prediction")
         if self.multi_class == "ovr":
             return super(LogisticRegression, self)._predict_proba_lr(X)
-        elif self.coef_.shape[0] == 1:
-            # Workaround for multi_class="multinomial" and binary outcomes
-            # which requires softmax prediction with only a 1D coef_ vector.
-            decision = self.decision_function(X)
-            return softmax(np.c_[-decision, decision], copy=False)
         else:
-            return softmax(self.decision_function(X), copy=False)
+            decision = self.decision_function(X)
+            if decision.ndim == 1:
+                # Workaround for multi_class="multinomial" and binary outcomes
+                # which requires softmax prediction with only a 1D decision.
+                decision_2d = np.c_[-decision, decision]
+            else:
+                decision_2d = decision
+            return softmax(decision_2d, copy=False)
 
     def predict_log_proba(self, X):
         """Log of probability estimates.
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 8af93eba16cf6..46ce635daf830 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -198,6 +198,23 @@ def test_multinomial_binary():
         assert_greater(np.mean(pred == target), .9)
 
 
+def test_multinomial_binary_probabilities():
+    # Test multinomial LR gives expected probabilities based on the
+    # decision function, for a binary problem.
+    X, y = make_classification()
+    clf = LogisticRegression(multi_class='multinomial', solver='saga')
+    clf.fit(X, y)
+
+    decision = clf.decision_function(X)
+    proba = clf.predict_proba(X)
+
+    expected_proba_class_1 = (np.exp(decision) /
+                              (np.exp(decision) + np.exp(-decision)))
+    expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1]
+
+    assert_almost_equal(proba, expected_proba)
+
+
 def test_sparsify():
     # Test sparsify and densify members.
     n_samples, n_features = iris.data.shape
@@ -565,38 +582,6 @@ def test_ovr_multinomial_iris():
         assert_equal(scores.shape, (3, n_cv, 10))
 
 
-def test_ovr_multinomial_iris_binary():
-    # Test that multinomial gives better predictions on binary outcome iris
-    # model with respect to log loss and negligible regularisation. Note that
-    # it should be slightly greater as the parameters of the multinomial
-    # model are roughly half those of ovr so are slightly less affected by
-    # regularisation and can make slightly better estimates.
-    train, target = iris.data, iris.target
-    n_samples, n_features = train.shape
-
-    # Conflate classes 0 and 1 and train ovr and multinomial models on this
-    # modified dataset
-    target_copy = target.copy()
-    target_copy[target_copy == 0] = 1
-
-    clf_ovr = LogisticRegression(C=10000, multi_class='ovr',
-                                 solver='saga', max_iter=100000)
-    clf_multi = LogisticRegression(C=10000, multi_class='multinomial',
-                                   solver='saga', max_iter=100000)
-    clf_ovr.fit(train, target_copy)
-    clf_multi.fit(train, target_copy)
-
-    # Test that for the iris data multinomial gives a better accuracy than OvR
-    # on the conflated 2 class dataset with respect to log loss
-    predicted_ovr = clf_ovr.predict_proba(train)
-    predicted_multi = clf_multi.predict_proba(train)
-
-    ovr_log_loss = log_loss(target_copy, predicted_ovr)
-    multi_log_loss = log_loss(target_copy, predicted_multi)
-
-    assert_greater(ovr_log_loss, multi_log_loss)
-
-
 def test_logistic_regression_solvers():
     X, y = make_classification(n_features=10, n_informative=5, random_state=0)