scikit-learn · lucyleeow · Jul 14, 2025 · Jun 18, 2025 · Jun 18, 2025 · Jul 2, 2025
diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31584.fix.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31584.fix.rst
@@ -0,0 +1,4 @@
+- Fixed a spurious warning (about the number of unique classes being
+  greater than 50% of the number of samples) that could occur when
+  passing `classes` :func:`utils.multiclass.type_of_target`.
+  By :user:`Sascha D. Krauss <saskra>`.
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
@@ -414,7 +414,7 @@ def _raise_or_return():
     if issparse(first_row_or_val):
         first_row_or_val = first_row_or_val.data
     classes = cached_unique(y)
-    if y.shape[0] > 20 and classes.shape[0] > round(0.5 * y.shape[0]):
+    if y.shape[0] > 20 and y.shape[0] > classes.shape[0] > round(0.5 * y.shape[0]):
         # Only raise the warning when we have at least 20 samples.
         warnings.warn(
             "The number of unique classes is greater than 50% of the number "

diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
@@ -302,7 +302,11 @@ def test_type_of_target_too_many_unique_classes():
     We need to check that we don't raise if we have less than 20 samples.
     """
 
-    y = np.arange(25)
+    # Create array of unique labels, except '0', which appears twice.
+    # This does raise a warning.
+    # Note warning would not be raised if we passed only unique
+    # labels, which happens when `type_of_target` is passed `classes_`.
+    y = np.hstack((np.arange(20), [0]))
     msg = r"The number of unique classes is greater than 50% of the number of samples."
     with pytest.warns(UserWarning, match=msg):
         type_of_target(y)
@@ -313,6 +317,14 @@ def test_type_of_target_too_many_unique_classes():
         warnings.simplefilter("error")
         type_of_target(y)
 
+    # More than 20 samples but only unique classes, simulating passing
+    # `classes_` to `type_of_target` (when number of classes is large).
+    # No warning should be raised
+    y = np.arange(25)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", UserWarning)
+        type_of_target(y)
+
 
 def test_unique_labels_non_specific():
     # Test unique_labels with a variety of collected examples

diff --git a/sklearn/utils/tests/test_response.py b/sklearn/utils/tests/test_response.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 import pytest
 
@@ -369,3 +371,24 @@ def test_get_response_values_multilabel_indicator(response_method):
         assert (y_pred > 1).sum() > 0
     else:  # response_method == "predict"
         assert np.logical_or(y_pred == 0, y_pred == 1).all()
+
+
+def test_response_values_type_of_target_on_classes_no_warning():
+    """
+    Ensure `_get_response_values` doesn't raise spurious warning.
+
+    "The number of unique classes is greater than > 50% of samples"
+    warning should not be raised when calling `type_of_target(classes_)`.
+
+    Non-regression test for issue #31583.
+    """
+    X = np.random.RandomState(0).randn(120, 3)
+    # 30 classes, less than 50% of number of samples
+    y = np.repeat(np.arange(30), 4)
+
+    clf = LogisticRegression().fit(X, y)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+
+        _get_response_values(clf, X, response_method="predict_proba")