Skip to content

[MRG] FIX+TST regression in multinomial logstc reg when class_weight is auto #5420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions sklearn/linear_model/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
order='F')
mask_classes = classes

if class_weight == "auto":
if class_weight in ["auto", "balanced"]:
y_ = y_bin if multi_class == 'ovr' else y
class_weight_ = compute_class_weight(class_weight, mask_classes,
y_bin)
sample_weight *= class_weight_[le.fit_transform(y_bin)]
y_)
sample_weight *= class_weight_[le.fit_transform(y_)]

if coef is not None:
# it must work both giving the bias term and not
Expand Down Expand Up @@ -1125,7 +1126,7 @@ def fit(self, X, y, sample_weight=None):
raise ValueError("Tolerance for stopping criteria must be "
"positive; got (tol=%r)" % self.tol)

X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
order="C")
check_classification_targets(y)
self.classes_ = np.unique(y)
Expand Down
10 changes: 10 additions & 0 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_warns
from sklearn.utils.testing import assert_warns_message
from sklearn.utils.testing import raises
from sklearn.utils.testing import ignore_warnings
from sklearn.utils.testing import assert_raise_message
Expand Down Expand Up @@ -875,3 +876,12 @@ def test_warm_start():
assert_greater(2.0, cum_diff, msg)
else:
assert_greater(cum_diff, 2.0, msg)


def test_multinomial_logistic_regression_with_classweight_auto():
X, y = make_classification(n_classes=5, n_informative=5, random_state=42)
model = LogisticRegression(multi_class='multinomial',
class_weight='auto', solver='lbfgs')
assert_warns_message(DeprecationWarning,
"class_weight='auto' heuristic is deprecated",
model.fit, X, y)
6 changes: 0 additions & 6 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,6 @@ def test_class_weight_balanced_linear_classifiers():
and issubclass(clazz, LinearClassifierMixin)]

for name, Classifier in linear_classifiers:
if name == "LogisticRegressionCV":
# Contrary to RidgeClassifierCV, LogisticRegressionCV use actual
# CV folds and fit a model for each CV iteration before averaging
# the coef. Therefore it is expected to not behave exactly as the
# other linear model.
continue
yield check_class_weight_balanced_linear_classifier, name, Classifier


Expand Down