From 8939713421c7b9913ea1b04b0a362eb79900e798 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 16 Oct 2017 11:36:49 +0200 Subject: [PATCH 1/6] Incorrect multinomial logistic regression predict_proba test added (#9889) --- sklearn/linear_model/tests/test_logistic.py | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index ea4300df01100..eefbaf4bfc1fd 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -565,6 +565,35 @@ def test_ovr_multinomial_iris(): assert_equal(scores.shape, (3, n_cv, 10)) +def test_ovr_multinomial_iris_binary(): + # Test that multinomial gives better predictions on binary outcome iris + # model with respect to log loss and negligible regularisation. + train, target = iris.data, iris.target + n_samples, n_features = train.shape + + # Conflate classes 0 and 1 and train ovr and multinomial models on this + # modified dataset + target_copy = target.copy() + target_copy[target_copy == 0] = 1 + + clf_ovr = LogisticRegression(C=10000, multi_class='ovr', + solver='saga', max_iter=100000) + clf_multi = LogisticRegression(C=10000, multi_class='multinomial', + solver='saga', max_iter=100000) + clf_ovr.fit(train, target_copy) + clf_multi.fit(train, target_copy) + + # Test that for the iris data multinomial gives a better accuracy than OvR + # on the conflated 2 class dataset with respect to log loss + predicted_ovr = clf_ovr.predict_proba(train) + predicted_multi = clf_multi.predict_proba(train) + + ovr_log_loss = log_loss(target_copy, predicted_ovr) + multi_log_loss = log_loss(target_copy, predicted_multi) + + assert_greater(ovr_log_loss, multi_log_loss) + + def test_logistic_regression_solvers(): X, y = make_classification(n_features=10, n_informative=5, random_state=0) From 4ecac9ed67a1574c19f6756ccdb6cd9baf48ada9 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 16 Oct 2017 11:38:50 +0200 Subject: [PATCH 2/6] Fixed incorrect multinomial logistic regression predict_proba (#9889) --- sklearn/linear_model/logistic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 59e6db8457a45..134c1f44d33e4 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1331,9 +1331,13 @@ def predict_proba(self, X): """ if not hasattr(self, "coef_"): raise NotFittedError("Call fit before prediction") - calculate_ovr = self.coef_.shape[0] == 1 or self.multi_class == "ovr" - if calculate_ovr: + if self.multi_class == "ovr": return super(LogisticRegression, self)._predict_proba_lr(X) + elif self.coef_.shape[0] == 1: + # Workaround for multi_class="multinomial" and binary outcomes + # which requires softmax prediction with only a 1D coef_ vector. + decision = self.decision_function(X) + return softmax(np.c_[-decision, decision], copy=False) else: return softmax(self.decision_function(X), copy=False) From ae22f35b1098dece745421dca0809b154f054746 Mon Sep 17 00:00:00 2001 From: Rob Date: Mon, 16 Oct 2017 11:55:08 +0200 Subject: [PATCH 3/6] Updated what's new for multinomial logistic regression predictions (#9889) --- doc/whats_new/v0.20.rst | 5 +++++ sklearn/linear_model/tests/test_logistic.py | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 38bd521412926..4ea1728965b1d 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -85,6 +85,11 @@ Classifiers and regressors identical X values. :issue:`9432` by :user:`Dallas Card ` +- Fixed a bug in :class:`linear_model.LogisticRegression` where when using the + parameter ``multi_class='multinomial'``, the ``predict_proba`` method was + returning incorrect probabilities in the case of binary outcomes. + :issue:`9889`. By user `rwolst`. + Decomposition, manifold learning and clustering - Fix for uninformative error in :class:`decomposition.IncrementalPCA`: diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index eefbaf4bfc1fd..8af93eba16cf6 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -567,7 +567,10 @@ def test_ovr_multinomial_iris(): def test_ovr_multinomial_iris_binary(): # Test that multinomial gives better predictions on binary outcome iris - # model with respect to log loss and negligible regularisation. + # model with respect to log loss and negligible regularisation. Note that + # it should be slightly greater as the parameters of the multinomial + # model are roughly half those of ovr so are slightly less affected by + # regularisation and can make slightly better estimates. train, target = iris.data, iris.target n_samples, n_features = train.shape From 70e46b4bfd2abd78e38342c5fdea03adb0a756ad Mon Sep 17 00:00:00 2001 From: Rob Date: Tue, 17 Oct 2017 12:41:04 +0200 Subject: [PATCH 4/6] Updated doc string for coef_ and intercept_ (#9889) --- sklearn/linear_model/logistic.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 134c1f44d33e4..466117c85b435 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1102,13 +1102,18 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, Coefficient of the features in the decision function. `coef_` is of shape (1, n_features) when the given problem - is binary. + is binary and in the case when `multi_class='multinomial'`, then + `coef_` are the coefficients for outcome 1 (True) and `-coef_` are + the coefficients for outcome 0 (False). intercept_ : array, shape (1,) or (n_classes,) Intercept (a.k.a. bias) added to the decision function. If `fit_intercept` is set to False, the intercept is set to zero. - `intercept_` is of shape(1,) when the problem is binary. + `intercept_` is of shape(1,) when the problem is binary and in the + case when `multi_class='multinomial'`, then `intercept_` is the + intercept term for outcome 1 (True) and `-intercept_` is the intercept + term for outcome 0 (False). n_iter_ : array, shape (n_classes,) or (1, ) Actual number of iterations for all classes. If binary or multinomial, From fd59597863ced9b80605204f163c59d4c7ca392c Mon Sep 17 00:00:00 2001 From: Rob Date: Wed, 8 Nov 2017 16:27:07 +0100 Subject: [PATCH 5/6] Fixed :user: formatting in whats new --- doc/whats_new/v0.20.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 4c1a699a6239b..2e24cde966b5e 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -123,7 +123,7 @@ Classifiers and regressors - Fixed a bug in :class:`linear_model.LogisticRegression` where when using the parameter ``multi_class='multinomial'``, the ``predict_proba`` method was returning incorrect probabilities in the case of binary outcomes. - :issue:`9939` by user `rwolst`. + :issue:`9939` by :user: `rwolst`. Decomposition, manifold learning and clustering From fb88a0fcb35f2220fe78c186dd8511993b47543b Mon Sep 17 00:00:00 2001 From: Rob Date: Sat, 6 Jan 2018 11:30:49 +0100 Subject: [PATCH 6/6] Simplified regression test --- doc/whats_new/v0.20.rst | 2 +- sklearn/linear_model/logistic.py | 14 +++--- sklearn/linear_model/tests/test_logistic.py | 49 +++++++-------------- 3 files changed, 26 insertions(+), 39 deletions(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 5a02d22a12795..a2d5e25c6a211 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -183,7 +183,7 @@ Classifiers and regressors - Fixed a bug in :class:`linear_model.LogisticRegression` where when using the parameter ``multi_class='multinomial'``, the ``predict_proba`` method was returning incorrect probabilities in the case of binary outcomes. - :issue:`9939` by :user: `rwolst`. + :issue:`9939` by :user:`Roger Westover `. - Fixed a bug in :class:`linear_model.OrthogonalMatchingPursuit` that was broken when setting ``normalize=False``. diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index b8effabe87480..c7fa9dc4d0501 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1338,13 +1338,15 @@ def predict_proba(self, X): raise NotFittedError("Call fit before prediction") if self.multi_class == "ovr": return super(LogisticRegression, self)._predict_proba_lr(X) - elif self.coef_.shape[0] == 1: - # Workaround for multi_class="multinomial" and binary outcomes - # which requires softmax prediction with only a 1D coef_ vector. - decision = self.decision_function(X) - return softmax(np.c_[-decision, decision], copy=False) else: - return softmax(self.decision_function(X), copy=False) + decision = self.decision_function(X) + if decision.ndim == 1: + # Workaround for multi_class="multinomial" and binary outcomes + # which requires softmax prediction with only a 1D decision. + decision_2d = np.c_[-decision, decision] + else: + decision_2d = decision + return softmax(decision_2d, copy=False) def predict_log_proba(self, X): """Log of probability estimates. diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 8af93eba16cf6..46ce635daf830 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -198,6 +198,23 @@ def test_multinomial_binary(): assert_greater(np.mean(pred == target), .9) +def test_multinomial_binary_probabilities(): + # Test multinomial LR gives expected probabilities based on the + # decision function, for a binary problem. + X, y = make_classification() + clf = LogisticRegression(multi_class='multinomial', solver='saga') + clf.fit(X, y) + + decision = clf.decision_function(X) + proba = clf.predict_proba(X) + + expected_proba_class_1 = (np.exp(decision) / + (np.exp(decision) + np.exp(-decision))) + expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1] + + assert_almost_equal(proba, expected_proba) + + def test_sparsify(): # Test sparsify and densify members. n_samples, n_features = iris.data.shape @@ -565,38 +582,6 @@ def test_ovr_multinomial_iris(): assert_equal(scores.shape, (3, n_cv, 10)) -def test_ovr_multinomial_iris_binary(): - # Test that multinomial gives better predictions on binary outcome iris - # model with respect to log loss and negligible regularisation. Note that - # it should be slightly greater as the parameters of the multinomial - # model are roughly half those of ovr so are slightly less affected by - # regularisation and can make slightly better estimates. - train, target = iris.data, iris.target - n_samples, n_features = train.shape - - # Conflate classes 0 and 1 and train ovr and multinomial models on this - # modified dataset - target_copy = target.copy() - target_copy[target_copy == 0] = 1 - - clf_ovr = LogisticRegression(C=10000, multi_class='ovr', - solver='saga', max_iter=100000) - clf_multi = LogisticRegression(C=10000, multi_class='multinomial', - solver='saga', max_iter=100000) - clf_ovr.fit(train, target_copy) - clf_multi.fit(train, target_copy) - - # Test that for the iris data multinomial gives a better accuracy than OvR - # on the conflated 2 class dataset with respect to log loss - predicted_ovr = clf_ovr.predict_proba(train) - predicted_multi = clf_multi.predict_proba(train) - - ovr_log_loss = log_loss(target_copy, predicted_ovr) - multi_log_loss = log_loss(target_copy, predicted_multi) - - assert_greater(ovr_log_loss, multi_log_loss) - - def test_logistic_regression_solvers(): X, y = make_classification(n_features=10, n_informative=5, random_state=0)