scikit-learn · lorentzenchr · Feb 23, 2023 · Jan 5, 2023 · Jan 10, 2023 · Jan 10, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -165,6 +165,13 @@ Changelog
 - |Fix| :func:`metric.manhattan_distances` now supports readonly sparse datasets.
   :pr:`25432` by :user:`Julien Jerphanion <jjerphan>`.
 
+- |Fix| :func:`log_loss` raises a warning if the values of the parameter `y_pred` are
+  not normalized, instead of actually normalizing them in the metric. Starting from
+  1.5 this will raise an error. :pr:`25299` by :user:`Omar Salman <OmarManzoor`.
+
+- |API| The `eps` parameter of the :func:`log_loss` has been deprecated and will be
+  removed in 1.5. :pr:`25299` by :user:`Omar Salman <OmarManzoor>`.
+
 :mod:`sklearn.naive_bayes`
 ..........................
 

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
@@ -2622,6 +2622,9 @@ def log_loss(
            The default value changed from `1e-15` to `"auto"` that is
            equivalent to `np.finfo(y_pred.dtype).eps`.
 
+        .. deprecated:: 1.3
+           `eps` is deprecated in 1.3 and will be removed in 1.5.
+
     normalize : bool, default=True
         If true, return the mean loss per sample.
         Otherwise, return the sum of the per-sample losses.
@@ -2660,7 +2663,16 @@ def log_loss(
     y_pred = check_array(
         y_pred, ensure_2d=False, dtype=[np.float64, np.float32, np.float16]
     )
-    eps = np.finfo(y_pred.dtype).eps if eps == "auto" else eps
+    if eps == "auto":
+        eps = np.finfo(y_pred.dtype).eps
+    else:
+        # TODO: Remove user defined eps in 1.5
+        warnings.warn(
+            "Setting the eps parameter is deprecated and will "
+            "be removed in 1.5. Instead eps will always have"
+            "a default value of `np.finfo(y_pred.dtype).eps`.",
+            FutureWarning,
+        )
 
     check_consistent_length(y_pred, y_true, sample_weight)
     lb = LabelBinarizer()
@@ -2723,6 +2735,12 @@ def log_loss(
 
     # Renormalize
     y_pred_sum = y_pred.sum(axis=1)
+    if not np.isclose(y_pred_sum, 1, rtol=1e-15, atol=5 * eps).all():
+        warnings.warn(
+            "The y_pred values do not sum to one. Starting from 1.5 this"
+            "will result in an error.",
+            UserWarning,
+        )
     y_pred = y_pred / y_pred_sum[:, np.newaxis]
     loss = -xlogy(transformed_labels, y_pred).sum(axis=1)
 

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -2477,19 +2477,29 @@ def test_log_loss():
     loss = log_loss(y_true, y_pred, normalize=False)
     assert_almost_equal(loss, 0.6904911 * 6, decimal=6)
 
+    user_warning_msg = "y_pred values do not sum to one"
     # check eps and handling of absolute zero and one probabilities
     y_pred = np.asarray(y_pred) > 0.5
-    loss = log_loss(y_true, y_pred, normalize=True, eps=0.1)
-    assert_almost_equal(loss, log_loss(y_true, np.clip(y_pred, 0.1, 0.9)))
+    with pytest.warns(FutureWarning):
+        loss = log_loss(y_true, y_pred, normalize=True, eps=0.1)
+    with pytest.warns(UserWarning, match=user_warning_msg):
+        assert_almost_equal(loss, log_loss(y_true, np.clip(y_pred, 0.1, 0.9)))
 
     # binary case: check correct boundary values for eps = 0
-    assert log_loss([0, 1], [0, 1], eps=0) == 0
-    assert log_loss([0, 1], [0, 0], eps=0) == np.inf
-    assert log_loss([0, 1], [1, 1], eps=0) == np.inf
+    with pytest.warns(FutureWarning):
+        assert log_loss([0, 1], [0, 1], eps=0) == 0
+    with pytest.warns(FutureWarning):
+        assert log_loss([0, 1], [0, 0], eps=0) == np.inf
+    with pytest.warns(FutureWarning):
+        assert log_loss([0, 1], [1, 1], eps=0) == np.inf
 
     # multiclass case: check correct boundary values for eps = 0
-    assert log_loss([0, 1, 2], [[1, 0, 0], [0, 1, 0], [0, 0, 1]], eps=0) == 0
-    assert log_loss([0, 1, 2], [[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]], eps=0) == np.inf
+    with pytest.warns(FutureWarning):
+        assert log_loss([0, 1, 2], [[1, 0, 0], [0, 1, 0], [0, 0, 1]], eps=0) == 0
+    with pytest.warns(FutureWarning):
+        assert (
+            log_loss([0, 1, 2], [[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]], eps=0) == np.inf
+        )
 
     # raise error if number of classes are not equal.
     y_true = [1, 0, 2]
@@ -2500,7 +2510,8 @@ def test_log_loss():
     # case when y_true is a string array object
     y_true = ["ham", "spam", "spam", "ham"]
     y_pred = [[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]
-    loss = log_loss(y_true, y_pred)
+    with pytest.warns(UserWarning, match=user_warning_msg):
+        loss = log_loss(y_true, y_pred)
     assert_almost_equal(loss, 1.0383217, decimal=6)
 
     # test labels option
@@ -2528,7 +2539,8 @@ def test_log_loss():
     # ensure labels work when len(np.unique(y_true)) != y_pred.shape[1]
     y_true = [1, 2, 2]
     y_score2 = [[0.2, 0.7, 0.3], [0.6, 0.5, 0.3], [0.3, 0.9, 0.1]]
-    loss = log_loss(y_true, y_score2, labels=[1, 2, 3])
+    with pytest.warns(UserWarning, match=user_warning_msg):
+        loss = log_loss(y_true, y_score2, labels=[1, 2, 3])
     assert_almost_equal(loss, 1.0630345, decimal=6)
 
 
@@ -2568,7 +2580,8 @@ def test_log_loss_pandas_input():
     for TrueInputType, PredInputType in types:
         # y_pred dataframe, y_true series
         y_true, y_pred = TrueInputType(y_tr), PredInputType(y_pr)
-        loss = log_loss(y_true, y_pred)
+        with pytest.warns(UserWarning, match="y_pred values do not sum to one"):
+            loss = log_loss(y_true, y_pred)
         assert_almost_equal(loss, 1.0383217, decimal=6)