Skip to content

Commit 8aebfe5

Browse files
TomDLTglemaitre
authored andcommitted
FIX improve error message for large sparse matrix input in LogisticRegression (#21093)
1 parent c5a00ea commit 8aebfe5

File tree

3 files changed

+98
-3
lines changed

3 files changed

+98
-3
lines changed

doc/whats_new/v1.1.rst

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
.. include:: _contributors.rst
2+
3+
.. currentmodule:: sklearn
4+
5+
.. _changes_1_1:
6+
7+
Version 1.1.0
8+
=============
9+
10+
**In Development**
11+
12+
13+
.. include:: changelog_legend.inc
14+
15+
Minimal dependencies
16+
--------------------
17+
18+
Version 1.1.0 of scikit-learn requires python 3.7+, numpy 1.14.6+ and
19+
scipy 1.1.0+. Optional minimal dependency is matplotlib 2.2.2+.
20+
21+
Put the changes in their relevant module.
22+
23+
Changed models
24+
--------------
25+
26+
27+
Changelog
28+
---------
29+
30+
..
31+
Entries should be grouped by module (in alphabetic order) and prefixed with
32+
one of the labels: |MajorFeature|, |Feature|, |Efficiency|, |Enhancement|,
33+
|Fix| or |API| (see whats_new.rst for descriptions).
34+
Entries should be ordered by those labels (e.g. |Fix| after |Efficiency|).
35+
Changes not specific to a module should be listed under *Multiple Modules*
36+
or *Miscellaneous*.
37+
Entries should end with:
38+
:pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
39+
where 123456 is the *pull request* number, not the issue number.
40+
41+
:mod:`sklearn.linear_model`
42+
...........................
43+
44+
- |Fix| :class:`linear_model.LogisticRegression` now raises a better error
45+
message when the solver does not support sparse matrices with int64 indices.
46+
:pr:`21093` by `Tom Dupre la Tour`_.
47+
48+
:mod:`sklearn.utils`
49+
....................
50+
51+
- |Enhancement| :func:`utils.validation._check_sample_weight` can perform a
52+
non-negativity check on the sample weights. It can be turned on
53+
using the only_non_negative bool parameter.
54+
Estimators that check for non-negative weights are updated:
55+
:func:`linear_model.LinearRegression` (here the previous
56+
error message was misleading),
57+
:func:`ensemble.AdaBoostClassifier`,
58+
:func:`ensemble.AdaBoostRegressor`,
59+
:func:`neighbors.KernelDensity`.
60+
:pr:`20880` by :user:`Guillaume Lemaitre <glemaitre>`
61+
and :user:`András Simon <simonandras>`.
62+
63+
:mod:`sklearn.pipeline`
64+
.......................
65+
66+
- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
67+
Setting a transformer to "passthrough" will pass the features unchanged.
68+
:pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.
69+
70+
Code and Documentation Contributors
71+
-----------------------------------
72+
73+
Thanks to everyone who has contributed to the maintenance and improvement of
74+
the project since version 1.0, including:
75+
76+
TODO: update at the time of the release.

sklearn/linear_model/_logistic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ def _logistic_regression_path(
663663
X,
664664
accept_sparse="csr",
665665
dtype=np.float64,
666-
accept_large_sparse=solver != "liblinear",
666+
accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
667667
)
668668
y = check_array(y, ensure_2d=False, dtype=None)
669669
check_consistent_length(X, y)
@@ -1511,7 +1511,7 @@ def fit(self, X, y, sample_weight=None):
15111511
accept_sparse="csr",
15121512
dtype=_dtype,
15131513
order="C",
1514-
accept_large_sparse=solver != "liblinear",
1514+
accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
15151515
)
15161516
check_classification_targets(y)
15171517
self.classes_ = np.unique(y)
@@ -2080,7 +2080,7 @@ def fit(self, X, y, sample_weight=None):
20802080
accept_sparse="csr",
20812081
dtype=np.float64,
20822082
order="C",
2083-
accept_large_sparse=solver != "liblinear",
2083+
accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
20842084
)
20852085
check_classification_targets(y)
20862086

sklearn/linear_model/tests/test_logistic.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,3 +2237,22 @@ def test_sample_weight_not_modified(multi_class, class_weight):
22372237
)
22382238
clf.fit(X, y, sample_weight=W)
22392239
assert_allclose(expected, W)
2240+
2241+
2242+
@pytest.mark.parametrize("solver", ["liblinear", "lbfgs", "newton-cg", "sag", "saga"])
2243+
def test_large_sparse_matrix(solver):
2244+
# Solvers either accept large sparse matrices, or raise helpful error.
2245+
# Non-regression test for pull-request #21093.
2246+
2247+
# generate sparse matrix with int64 indices
2248+
X = sp.rand(20, 10, format="csr")
2249+
for attr in ["indices", "indptr"]:
2250+
setattr(X, attr, getattr(X, attr).astype("int64"))
2251+
y = np.random.randint(2, size=X.shape[0])
2252+
2253+
if solver in ["liblinear", "sag", "saga"]:
2254+
msg = "Only sparse matrices with 32-bit integer indices"
2255+
with pytest.raises(ValueError, match=msg):
2256+
LogisticRegression(solver=solver).fit(X, y)
2257+
else:
2258+
LogisticRegression(solver=solver).fit(X, y)

0 commit comments

Comments
 (0)