scikit-learn · glemaitre · Aug 11, 2020 · Aug 11, 2020 · Aug 11, 2020 · Aug 11, 2020
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -202,6 +202,26 @@ Here is an example of building custom scorers, and of using the
     >>> score(clf, X, y)
     -0.69...
 
+You can as well used predefined metrics, shown in the table above, where the
+parameters `greater_is_better`, `needs_proba`, and `needs_threshold` will not
+be required. Only the additional scoring function parameters should be given if
+there is any::
+
+    >>> precision_scorer = make_scorer("precision", average="micro")
+    >>> from sklearn.datasets import make_classification
+    >>> X, y = make_classification(
+    ...     n_classes=3, n_informative=3, random_state=0
+    ... )
+    >>> clf.fit(X, y)
+    DummyClassifier(random_state=0, strategy='most_frequent')
+    >>> precision_scorer(clf, X, y)
+    0.35...
+
+Similarly, you can use a scorer to create a new scorer::
+
+    >>> new_scorer = make_scorer(precision_scorer, average="macro")
+    >>> new_scorer(clf, X, y)
+    0.11...
 
 .. _diy_scoring:
 

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -280,6 +280,10 @@ Changelog
   class to be used when computing the roc auc statistics.
   :pr:`17651` by :user:`Clara Matos <claramatos>`.
 
+- |Enhancement| Allow to pass a scorer or a string to create a new scorer in
+  :func:`metrics.make_scorer`.
+  :pr:`18141` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
@@ -18,9 +18,9 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
+from collections import Counter
 from collections.abc import Iterable
 from functools import partial
-from collections import Counter
 
 import numpy as np
 
@@ -507,9 +507,21 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
 
     Parameters
     ----------
-    score_func : callable,
-        Score function (or loss function) with signature
-        ``score_func(y, y_pred, **kwargs)``.
+    scoring : str or callable
+        This parameter can be:
+
+        * a string (see model evaluation documentation). The parameters
+          `greater_is_better`, `needs_proba`, and `needs_threshold` will be
+          ignored and inferred from the base scorers. However, you can pass any
+          additional parameters required by the scoring function as `**kwargs`;
+        * a scorer callable object originally constructed with
+          :func:`make_scorer` or returned by :func:`get_scorer`. In this case,
+          the parameters `greater_is_better`, `needs_proba`, and
+          `needs_threshold` will be ignored and inferred from the base scorers.
+          However, you can pass any additional parameters required by the
+          scoring function as `**kwargs`;
+        * a scorer callable object / function with signature
+          `scorer(estimator, X, y)`.
 
     greater_is_better : bool, default=True
         Whether score_func is a score function (default), meaning high is good,
@@ -546,6 +558,8 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
 
     Examples
     --------
+    You can create a scorer from a callable function:
+
     >>> from sklearn.metrics import fbeta_score, make_scorer
     >>> ftwo_scorer = make_scorer(fbeta_score, beta=2)
     >>> ftwo_scorer
@@ -555,6 +569,23 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
     >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
     ...                     scoring=ftwo_scorer)
 
+    Otherwise, you can use a string avoiding to pass the parameters required
+    by `make_scorer`:
+
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> roc_auc_scorer = make_scorer("roc_auc")
+    >>> clf = LinearSVC(random_state=0).fit(X, y)
+    >>> roc_auc_scorer(clf, X, y)
+    0.98...
+
+    Similarly, you can use a scorer obtained with :func:`get_scorer`:
+
+    >>> from sklearn.metrics import get_scorer
+    >>> roc_auc_scorer = get_scorer("roc_auc")
+    >>> roc_auc_scorer(clf, X, y)
+    0.98...
+
     Notes
     -----
     If `needs_proba=False` and `needs_threshold=False`, the score
@@ -565,16 +596,28 @@ def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
     `needs_threshold=True`, the score function is supposed to accept the
     output of :term:`decision_function`.
     """
-    sign = 1 if greater_is_better else -1
-    if needs_proba and needs_threshold:
-        raise ValueError("Set either needs_proba or needs_threshold to True,"
-                         " but not both.")
-    if needs_proba:
-        cls = _ProbaScorer
-    elif needs_threshold:
-        cls = _ThresholdScorer
+    if isinstance(score_func, (str, _BaseScorer)):
+        base_scorer = (
+            get_scorer(score_func)
+            if isinstance(score_func, str)
+            else score_func
+        )
+        cls = base_scorer.__class__
+        score_func = base_scorer._score_func
+        sign = base_scorer._sign
     else:
-        cls = _PredictScorer
+        sign = 1 if greater_is_better else -1
+        if needs_proba and needs_threshold:
+            raise ValueError(
+                "Set either needs_proba or needs_threshold to True, but not "
+                "both."
+            )
+        if needs_proba:
+            cls = _ProbaScorer
+        elif needs_threshold:
+            cls = _ThresholdScorer
+        else:
+            cls = _PredictScorer
     return cls(score_func, sign, kwargs)
 
 

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
@@ -747,3 +747,34 @@ def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
     msg = "'Perceptron' object has no attribute 'predict_proba'"
     with pytest.raises(AttributeError, match=msg):
         scorer(lr, X, y)
+
+
+@pytest.mark.parametrize(
+    "scoring",
+    ["roc_auc", get_scorer("roc_auc")],
+    ids=["str", "scorer_instance"],
+)
+def test_make_scorer_from_str_or_base_scorer(scoring):
+    # check that we can create a scorer from a string or a previous scorer
+    base_scorer = get_scorer(scoring) if isinstance(scoring, str) else scoring
+    scorer = make_scorer(scoring)
+
+    # check that we have a different object but with the same parameter values
+    assert scorer is not base_scorer
+    assert scorer._score_func == base_scorer._score_func
+    assert scorer._sign == base_scorer._sign
+    assert scorer._kwargs == base_scorer._kwargs
+
+    # check that the parameters of `make_scorer` do not have any effect when
+    # passing a string. The following would have raised an error because a
+    # scorer cannot be a _ProbaScorer and a _ThresholdScorer at the same time.
+    scorer = make_scorer(
+        scoring,
+        greater_is_better=False,
+        needs_threshold=True,
+        needs_proba=True,
+    )
+
+    # check that we can overwrite the scoring function parameters
+    scorer = make_scorer(scoring, multi_class="ovo")
+    assert scorer._kwargs == {"multi_class": "ovo"}