Skip to content

Commit 217fe94

Browse files
authored
FIX LogisticRegression warm start with newton-cholesky solver (#31866)
1 parent ba3e753 commit 217fe94

File tree

3 files changed

+56
-5
lines changed

3 files changed

+56
-5
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
- Fixed a bug in class:`linear_model:LogisticRegression` when used with
2+
`solver="newton-cholesky"`and `warm_start=True` on multi-class problems, either
3+
with `fit_intercept=True` or with `penalty=None` (both resulting in unpenalized
4+
parameters for the solver). The coefficients and intercepts of the last class as
5+
provided by warm start were partially wrongly overwritten by zero.
6+
By :user:`Christian Lorentzen <lorentzenchr>`

sklearn/linear_model/_glm/_newton_solver.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,19 @@ def setup(self, X, y, sample_weight):
469469
self.is_multinomial_no_penalty = (
470470
self.linear_loss.base_loss.is_multiclass and self.l2_reg_strength == 0
471471
)
472+
if self.is_multinomial_no_penalty:
473+
# See inner_solve. The provided coef might not adhere to the convention
474+
# that the last class is set to zero.
475+
# This is done by the usual freedom of a (overparametrized) multinomial to
476+
# add a constant to all classes which doesn't change predictions.
477+
n_classes = self.linear_loss.base_loss.n_classes
478+
coef = self.coef.reshape(n_classes, -1, order="F") # easier as 2d
479+
coef -= coef[-1, :] # coef -= coef of last class
480+
elif self.is_multinomial_with_intercept:
481+
# See inner_solve. Same as above, but only for the intercept.
482+
n_classes = self.linear_loss.base_loss.n_classes
483+
# intercept -= intercept of last class
484+
self.coef[-n_classes:] -= self.coef[-1]
472485

473486
def update_gradient_hessian(self, X, y, sample_weight):
474487
_, _, self.hessian_warning = self.linear_loss.gradient_hessian(
@@ -518,10 +531,10 @@ def inner_solve(self, X, y, sample_weight):
518531
#
519532
# We choose the standard approach and set all the coefficients of the last
520533
# class to zero, for all features including the intercept.
534+
# Note that coef was already dealt with in setup.
521535
n_classes = self.linear_loss.base_loss.n_classes
522536
n_dof = self.coef.size // n_classes # degree of freedom per class
523537
n = self.coef.size - n_dof # effective size
524-
self.coef[n_classes - 1 :: n_classes] = 0
525538
self.gradient[n_classes - 1 :: n_classes] = 0
526539
self.hessian[n_classes - 1 :: n_classes, :] = 0
527540
self.hessian[:, n_classes - 1 :: n_classes] = 0
@@ -544,7 +557,7 @@ def inner_solve(self, X, y, sample_weight):
544557
elif self.is_multinomial_with_intercept:
545558
# Here, only intercepts are unpenalized. We again choose the last class and
546559
# set its intercept to zero.
547-
self.coef[-1] = 0
560+
# Note that coef was already dealt with in setup.
548561
self.gradient[-1] = 0
549562
self.hessian[-1, :] = 0
550563
self.hessian[:, -1] = 0

sklearn/linear_model/tests/test_logistic.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,9 +1434,7 @@ def test_n_iter(solver):
14341434
assert clf_cv.n_iter_.shape == (1, n_cv_fold, n_Cs)
14351435

14361436

1437-
@pytest.mark.parametrize(
1438-
"solver", sorted(set(SOLVERS) - set(["liblinear", "newton-cholesky"]))
1439-
)
1437+
@pytest.mark.parametrize("solver", sorted(set(SOLVERS) - set(["liblinear"])))
14401438
@pytest.mark.parametrize("warm_start", (True, False))
14411439
@pytest.mark.parametrize("fit_intercept", (True, False))
14421440
def test_warm_start(global_random_seed, solver, warm_start, fit_intercept):
@@ -1469,6 +1467,40 @@ def test_warm_start(global_random_seed, solver, warm_start, fit_intercept):
14691467
assert cum_diff > 2.0, msg
14701468

14711469

1470+
@pytest.mark.parametrize("solver", ["newton-cholesky", "newton-cg"])
1471+
@pytest.mark.parametrize("fit_intercept", (True, False))
1472+
@pytest.mark.parametrize("penalty", ("l2", None))
1473+
def test_warm_start_newton_solver(global_random_seed, solver, fit_intercept, penalty):
1474+
"""Test that 2 steps at once are the same as 2 single steps with warm start."""
1475+
X, y = iris.data, iris.target
1476+
1477+
clf1 = LogisticRegression(
1478+
solver=solver,
1479+
max_iter=2,
1480+
fit_intercept=fit_intercept,
1481+
penalty=penalty,
1482+
random_state=global_random_seed,
1483+
)
1484+
with ignore_warnings(category=ConvergenceWarning):
1485+
clf1.fit(X, y)
1486+
1487+
clf2 = LogisticRegression(
1488+
solver=solver,
1489+
max_iter=1,
1490+
warm_start=True,
1491+
fit_intercept=fit_intercept,
1492+
penalty=penalty,
1493+
random_state=global_random_seed,
1494+
)
1495+
with ignore_warnings(category=ConvergenceWarning):
1496+
clf2.fit(X, y)
1497+
clf2.fit(X, y)
1498+
1499+
assert_allclose(clf2.coef_, clf1.coef_)
1500+
if fit_intercept:
1501+
assert_allclose(clf2.intercept_, clf1.intercept_)
1502+
1503+
14721504
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
14731505
def test_saga_vs_liblinear(global_random_seed, csr_container):
14741506
iris = load_iris()

0 commit comments

Comments
 (0)