scikit-learn · glemaitre · Mar 24, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
@@ -301,6 +301,10 @@ Changelog
   both return `np.nan`.
   :pr:`25531` by :user:`Marc Torrellas Socastro <marctorsoc>`.
 
+- |Fix| :func:`metric.ndcg_score` now gives a meaningful error message for input of
+  length 1.
+  :pr:`25672` by :user:`Lene Preuss <lene>` and :user:`Wei-Chun Chu <wcchu>`.
+
 - |Enhancement| :class:`metrics.silhouette_samples` nows accepts a sparse
   matrix of pairwise distances between samples, or a feature array.
   :pr:`18723` by :user:`Sahil Gupta <sahilgupta2105>` and

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -1733,10 +1733,16 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
     if y_true.min() < 0:
         # TODO(1.4): Replace warning w/ ValueError
         warnings.warn(
-            "ndcg_score should not be used on negative y_true values. ndcg_score will"
-            " raise a ValueError on negative y_true values starting from version 1.4.",
+            "ndcg_score should not be used on negative y_true values. ndcg_score"
+            " will raise a ValueError on negative y_true values starting from"
+            " version 1.4.",
             FutureWarning,
         )
+    if y_true.ndim > 1 and y_true.shape[1] <= 1:
+        raise ValueError(
+            "Computing NDCG is only meaningful when there is more than 1 document. "
+            f"Got {y_true.shape[1]} instead."
+        )
     _check_dcg_target_type(y_true)
     gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)
     return np.average(gain, weights=sample_weight)

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -1535,7 +1535,6 @@ def test_lrap_error_raised():
 @pytest.mark.parametrize("n_classes", (2, 5, 10))
 @pytest.mark.parametrize("random_state", range(1))
 def test_alternative_lrap_implementation(n_samples, n_classes, random_state):
-
     check_alternative_lrap_implementation(
         label_ranking_average_precision_score, n_classes, n_samples, random_state
     )
@@ -1835,6 +1834,17 @@ def test_ndcg_toy_examples(ignore_ties):
     assert ndcg_score(y_true, y_score, ignore_ties=ignore_ties) == pytest.approx(1.0)
 
 
+def test_ndcg_error_single_document():
+    """Check that we raise an informative error message when trying to
+    compute NDCG with a single document."""
+    err_msg = (
+        "Computing NDCG is only meaningful when there is more than 1 document. "
+        "Got 1 instead."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        ndcg_score([[1]], [[1]])
+
+
 def test_ndcg_score():
     _, y_true = make_multilabel_classification(random_state=0, n_classes=10)
     y_score = -y_true + 1