From 1fbda386e8e647ff1e59067c2bd73ea2dbeb172f Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 10 Mar 2022 22:25:48 +0100
Subject: [PATCH 01/16] FEAT add metadata routing support to scorers

---
 sklearn/metrics/_scorer.py                  | 159 ++++++++++++++------
 sklearn/metrics/tests/test_score_objects.py |  22 ++-
 sklearn/utils/metadata_routing.py           |   1 +
 3 files changed, 136 insertions(+), 46 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 1e8e330d1af81..4418f3637be8b 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -18,6 +18,7 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
+import copy
 from collections.abc import Iterable
 from functools import partial
 from collections import Counter
@@ -61,6 +62,10 @@
 
 from ..utils.multiclass import type_of_target
 from ..base import is_regressor
+from ..utils.metadata_routing import _MetadataRequester
+from ..utils.metadata_routing import MetadataRequest
+from ..utils.metadata_routing import MetadataRouter
+from ..utils.metadata_routing import process_routing
 
 
 def _cached_call(cache, estimator, method, *args, **kwargs):
@@ -99,11 +104,15 @@ def __call__(self, estimator, *args, **kwargs):
         cache = {} if self._use_cache(estimator) else None
         cached_call = partial(_cached_call, cache)
 
+        params = process_routing(self, "score", kwargs)
+
         for name, scorer in self._scorers.items():
             if isinstance(scorer, _BaseScorer):
-                score = scorer._score(cached_call, estimator, *args, **kwargs)
+                score = scorer._score(
+                    cached_call, estimator, *args, **params.get(name).score
+                )
             else:
-                score = scorer(estimator, *args, **kwargs)
+                score = scorer(estimator, *args, **params.get(name).score)
             scores[name] = score
         return scores
 
@@ -138,8 +147,23 @@ def _use_cache(self, estimator):
                 return True
         return False
 
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        Returns
+        -------
+        routing : dict
+            A dict representing a MetadataRouter.
+        """
+        return MetadataRouter(owner=self.__class__.__name__).add(
+            **self._scorers, method_mapping="score"
+        )
+
 
-class _BaseScorer:
+class _BaseScorer(_MetadataRequester):
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
@@ -191,7 +215,7 @@ def __repr__(self):
             kwargs_string,
         )
 
-    def __call__(self, estimator, X, y_true, sample_weight=None):
+    def __call__(self, estimator, X, y_true, **kwargs):
         """Evaluate predicted target values for X relative to y_true.
 
         Parameters
@@ -206,29 +230,45 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
         score : float
             Score function applied to prediction of estimator on X.
         """
-        return self._score(
-            partial(_cached_call, None),
-            estimator,
-            X,
-            y_true,
-            sample_weight=sample_weight,
-        )
+        return self._score(partial(_cached_call, None), estimator, X, y_true, **kwargs)
 
     def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
+    def set_score_request(self, **kwargs):
+        """Set requested parameters by the scorer.
+
+        Note that this method returns a new instance of the scorer, and does
+        **not** change the original scorer object.
+
+        .. versionadded:: 1.1
+
+        Parameters
+        ----------
+        kwargs : dict
+            Arguments should be of the form param_name={True, False, None, str}.
+            The value can also be of the form RequestType
+        """
+        res = copy.deepcopy(self)
+        res._metadata_request = MetadataRequest(owner=self.__class__.__name__)
+        for param, alias in kwargs.items():
+            res._metadata_request.score.add_request(param=param, alias=alias)
+        return res
+
 
 class _PredictScorer(_BaseScorer):
-    def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
+    def _score(self, method_caller, estimator, X, y_true, **kwargs):
         """Evaluate predicted target values for X relative to y_true.
 
         Parameters
@@ -247,8 +287,10 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
@@ -257,16 +299,13 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         """
 
         y_pred = method_caller(estimator, "predict", X)
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y_true, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
+        scoring_kwargs = copy.deepcopy(self._kwargs)
+        scoring_kwargs.update(kwargs)
+        return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
 
 
 class _ProbaScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, **kwargs):
         """Evaluate predicted probabilities for X relative to y_true.
 
         Parameters
@@ -286,8 +325,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             Gold standard target values for X. These must be class labels,
             not probabilities.
 
-        sample_weight : array-like, default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
@@ -302,19 +343,23 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             # problem: (when only 2 class are given to `y_true` during scoring)
             # Thus, we need to check for the shape of `y_pred`.
             y_pred = self._select_proba_binary(y_pred, clf.classes_)
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y, y_pred, **self._kwargs)
+
+        scoring_kwargs = copy.deepcopy(self._kwargs)
+        scoring_kwargs.update(kwargs)
+        # this is for backward compatibility to avoid passing sample_weight
+        # to the scorer if it's None
+        # probably remove in v1.3
+        if scoring_kwargs.get("sample_weight", -1) is None:
+            del scoring_kwargs["sample_weight"]
+
+        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
 
     def _factory_args(self):
         return ", needs_proba=True"
 
 
 class _ThresholdScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, **kwargs):
         """Evaluate decision function output for X relative to y_true.
 
         Parameters
@@ -336,8 +381,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             Gold standard target values for X. These must be class labels,
             not decision function values.
 
-        sample_weight : array-like, default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
@@ -374,12 +421,14 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
 
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y, y_pred, **self._kwargs)
+        scoring_kwargs = copy.deepcopy(self._kwargs)
+        scoring_kwargs.update(kwargs)
+        # this is for backward compatibility to avoid passing sample_weight
+        # to the scorer if it's None
+        # probably remove in v1.3
+        if scoring_kwargs.get("sample_weight", -1) is None:
+            del scoring_kwargs["sample_weight"]
+        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
 
     def _factory_args(self):
         return ", needs_threshold=True"
@@ -414,9 +463,29 @@ def get_scorer(scoring):
     return scorer
 
 
-def _passthrough_scorer(estimator, *args, **kwargs):
-    """Function that wraps estimator.score"""
-    return estimator.score(*args, **kwargs)
+class _passthrough_scorer:
+    def __init__(self, estimator):
+        self._estimator = estimator
+
+    def __call__(self, estimator, *args, **kwargs):
+        """Method that wraps estimator.score"""
+        return estimator.score(*args, **kwargs)
+
+    def get_metadata_request(self):
+        """Get requested data properties.
+
+        .. versionadded:: 1.1
+
+        Returns
+        -------
+        request : dict
+            A dict of dict of str->value. The key to the first dict is the name
+            of the method, and the key to the second dict is the name of the
+            argument requested by the method.
+        """
+        return MetadataRouter(owner=self.__class__.__name__).add(
+            estimator=self._estimator, method_mapping="score"
+        )
 
 
 def check_scoring(estimator, scoring=None, *, allow_none=False):
@@ -471,7 +540,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
         return get_scorer(scoring)
     elif scoring is None:
         if hasattr(estimator, "score"):
-            return _passthrough_scorer
+            return _passthrough_scorer(estimator)
         elif allow_none:
             return None
         else:
@@ -604,7 +673,7 @@ def make_scorer(
     ----------
     score_func : callable
         Score function (or loss function) with signature
-        `score_func(y, y_pred, **kwargs)`.
+        ``score_func(y, y_pred, **kwargs)``.
 
     greater_is_better : bool, default=True
         Whether `score_func` is a score function (default), meaning high is
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 3b3caceb9970a..7b21fac04c02c 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -236,7 +236,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator):
     estimator = EstimatorWithFitAndScore()
     estimator.fit([[1]], [1])
     scorer = scoring_validator(estimator)
-    assert scorer is _passthrough_scorer
+    assert isinstance(scorer, _passthrough_scorer)
     assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
 
     estimator = EstimatorWithFitAndPredict()
@@ -629,11 +629,14 @@ def test_classification_scorer_sample_weight():
         else:
             target = y_test
         try:
+            scorer = scorer.set_score_request(sample_weight=True)
             weighted = scorer(
                 estimator[name], X_test, target, sample_weight=sample_weight
             )
             ignored = scorer(estimator[name], X_test[10:], target[10:])
             unweighted = scorer(estimator[name], X_test, target)
+            # this should not raise. sample_weight should be ignored if None.
+            _ = scorer(estimator[name], X_test[:10], target[:10], sample_weight=None)
             assert weighted != unweighted, (
                 f"scorer {name} behaves identically when called with "
                 f"sample weights: {weighted} vs {unweighted}"
@@ -677,6 +680,7 @@ def test_regression_scorer_sample_weight():
             # skip classification scorers
             continue
         try:
+            scorer = scorer.set_score_request(sample_weight=True)
             weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight)
             ignored = scorer(reg, X_test[11:], y_test[11:])
             unweighted = scorer(reg, X_test, y_test)
@@ -1140,3 +1144,19 @@ def test_scorer_no_op_multiclass_select_proba():
         labels=lr.classes_,
     )
     scorer(lr, X_test, y_test)
+
+
+@pytest.mark.parametrize("name, scorer", SCORERS.items())
+def test_scorer_metadata_request(name, scorer):
+    assert hasattr(scorer, "set_score_request")
+    assert hasattr(scorer, "get_metadata_routing")
+
+    weighted_scorer = scorer.set_score_request(sample_weight=True)
+    # set_score_request shouldn't mutate the instance
+    assert weighted_scorer is not scorer
+
+    assert str(scorer.get_metadata_routing()) == "{}"
+    assert (
+        str(weighted_scorer.get_metadata_routing())
+        == "{'score': {'sample_weight': <RequestType.REQUESTED: True>}}"
+    )
diff --git a/sklearn/utils/metadata_routing.py b/sklearn/utils/metadata_routing.py
index 1b3ba6e33b1f7..89ba5b5a9253c 100644
--- a/sklearn/utils/metadata_routing.py
+++ b/sklearn/utils/metadata_routing.py
@@ -14,3 +14,4 @@
 from ._metadata_requests import MetadataRequest  # noqa
 from ._metadata_requests import MethodMapping  # noqa
 from ._metadata_requests import process_routing  # noqa
+from ._metadata_requests import _MetadataRequester  # noqa

From 51baf9d277bc0498d104880e9a54cf3c598d1eef Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 10 Mar 2022 22:29:01 +0100
Subject: [PATCH 02/16] clarify doc on not mutating scorers

---
 doc/metadata_routing.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index c6641ada487db..0e20f73966bf7 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -46,8 +46,10 @@ Weighted scoring and fitting
 ----------------------------
 
 Here ``GroupKFold`` requests ``groups`` by default. However, we need to
-explicitly request weights in ``make_scorer`` and for ``LogisticRegressionCV``.
-Both of these *consumers* know how to use metadata called ``"sample_weight"``::
+explicitly request weights for our scorer and for ``LogisticRegressionCV``.
+Note that ``set_score_request`` on a scorer does not mutate the instance and
+instead returns a new instance with the metadata request attached to it. Both
+of these *consumers* know how to use metadata called ``"sample_weight"``::
 
   >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True

From 78638c40a0d4161c7db91131fbe1ae1bbd386b67 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 10 Mar 2022 22:40:44 +0100
Subject: [PATCH 03/16] improve tests

---
 sklearn/metrics/tests/test_score_objects.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 7b21fac04c02c..79e49bc68b4d1 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -15,6 +15,8 @@
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.metadata_routing import MetadataRouter
+from sklearn.utils.metadata_routing import process_routing
 
 from sklearn.base import BaseEstimator
 from sklearn.metrics import (
@@ -1160,3 +1162,18 @@ def test_scorer_metadata_request(name, scorer):
         str(weighted_scorer.get_metadata_routing())
         == "{'score': {'sample_weight': <RequestType.REQUESTED: True>}}"
     )
+
+    # make sure putting the scorer in a router doesn't request anything
+    router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score")
+    with pytest.raises(TypeError, match="got unexpected argument"):
+        router.validate_metadata(params={"sample_weight": 1}, method="score")
+    routed_params = router.route_params(params={"sample_weight": 1}, caller="score")
+    assert not routed_params.scorer.score
+
+    # make sure putting weighted_scorer in a router requests sample_weight
+    router = MetadataRouter(owner="test").add(
+        scorer=weighted_scorer, method_mapping="score"
+    )
+    router.validate_metadata(params={"sample_weight": 1}, method="score")
+    routed_params = router.route_params(params={"sample_weight": 1}, caller="score")
+    assert list(routed_params.scorer.score.keys()) == ["sample_weight"]

From ea317bc27e5bab252dd8304ddea1f9b16fb0d789 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 10 Mar 2022 22:44:10 +0100
Subject: [PATCH 04/16] remove unused import

---
 sklearn/metrics/tests/test_score_objects.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 79e49bc68b4d1..02ca8352d477b 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -16,7 +16,6 @@
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils.metadata_routing import MetadataRouter
-from sklearn.utils.metadata_routing import process_routing
 
 from sklearn.base import BaseEstimator
 from sklearn.metrics import (

From 6c6c2f1044e74283a70c1fd720baa09effad5e46 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 11 Mar 2022 10:49:00 +0100
Subject: [PATCH 05/16] fix tests

---
 sklearn/inspection/_permutation_importance.py | 2 +-
 sklearn/model_selection/tests/test_search.py  | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
index 7ca586efe8630..70b9c89c62126 100644
--- a/sklearn/inspection/_permutation_importance.py
+++ b/sklearn/inspection/_permutation_importance.py
@@ -15,7 +15,7 @@
 
 def _weights_scorer(scorer, estimator, X, y, sample_weight):
     if sample_weight is not None:
-        return scorer(estimator, X, y, sample_weight)
+        return scorer(estimator, X, y, sample_weight=sample_weight)
     return scorer(estimator, X, y)
 
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index a4dcc201c0bb1..4611669b2b974 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1902,7 +1902,13 @@ def _run_search(self, evaluate):
             attr[0].islower()
             and attr[-1:] == "_"
             and attr
-            not in {"cv_results_", "best_estimator_", "refit_time_", "classes_"}
+            not in {
+                "cv_results_",
+                "best_estimator_",
+                "refit_time_",
+                "classes_",
+                "scorer_",
+            }
         ):
             assert getattr(gscv, attr) == getattr(mycv, attr), (
                 "Attribute %s not equal" % attr

From b24c3addec0b2b7611b4ff656772eebbf7d3f886 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Fri, 11 Mar 2022 16:05:06 +0100
Subject: [PATCH 06/16] Christian's comments

---
 sklearn/metrics/_scorer.py                  | 22 ++++++++++-----------
 sklearn/metrics/tests/test_score_objects.py |  3 +--
 sklearn/utils/_metadata_requests.py         |  5 +++--
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 4418f3637be8b..290ec24cf2141 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -155,8 +155,9 @@ def get_metadata_routing(self):
 
         Returns
         -------
-        routing : dict
-            A dict representing a MetadataRouter.
+        routing : MetadataRouter
+            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
         """
         return MetadataRouter(owner=self.__class__.__name__).add(
             **self._scorers, method_mapping="score"
@@ -231,7 +232,7 @@ def __call__(self, estimator, X, y_true, **kwargs):
             Gold standard target values for X.
 
         **kwargs : dict
-            Other parameters passed to the scorer.
+            Other parameters passed to the scorer, e.g. sample_weight.
 
             .. versionadded:: 1.1
 
@@ -288,7 +289,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
             Gold standard target values for X.
 
         **kwargs : dict
-            Other parameters passed to the scorer.
+            Other parameters passed to the scorer, e.g. sample_weight.
 
             .. versionadded:: 1.1
 
@@ -326,7 +327,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
             not probabilities.
 
         **kwargs : dict
-            Other parameters passed to the scorer.
+            Other parameters passed to the scorer, e.g. sample_weight.
 
             .. versionadded:: 1.1
 
@@ -382,7 +383,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
             not decision function values.
 
         **kwargs : dict
-            Other parameters passed to the scorer.
+            Other parameters passed to the scorer, e.g. sample_weight.
 
             .. versionadded:: 1.1
 
@@ -471,17 +472,16 @@ def __call__(self, estimator, *args, **kwargs):
         """Method that wraps estimator.score"""
         return estimator.score(*args, **kwargs)
 
-    def get_metadata_request(self):
+    def get_metadata_routing(self):
         """Get requested data properties.
 
         .. versionadded:: 1.1
 
         Returns
         -------
-        request : dict
-            A dict of dict of str->value. The key to the first dict is the name
-            of the method, and the key to the second dict is the name of the
-            argument requested by the method.
+        routing : MetadataRouter
+            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
         """
         return MetadataRouter(owner=self.__class__.__name__).add(
             estimator=self._estimator, method_mapping="score"
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 02ca8352d477b..749cedc25bff5 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -681,7 +681,6 @@ def test_regression_scorer_sample_weight():
             # skip classification scorers
             continue
         try:
-            scorer = scorer.set_score_request(sample_weight=True)
             weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight)
             ignored = scorer(reg, X_test[11:], y_test[11:])
             unweighted = scorer(reg, X_test, y_test)
@@ -1163,7 +1162,7 @@ def test_scorer_metadata_request(name, scorer):
     )
 
     # make sure putting the scorer in a router doesn't request anything
-    router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score")
+    router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer)
     with pytest.raises(TypeError, match="got unexpected argument"):
         router.validate_metadata(params={"sample_weight": 1}, method="score")
     routed_params = router.route_params(params={"sample_weight": 1}, caller="score")
diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py
index 5719750ee5d58..9f6527f6809bf 100644
--- a/sklearn/utils/_metadata_requests.py
+++ b/sklearn/utils/_metadata_requests.py
@@ -1112,8 +1112,9 @@ def get_metadata_routing(self):
 
         Returns
         -------
-        routing : dict
-            A dict representing a MetadataRouter.
+        routing : MetadataRequest
+            A :class:`~utils.metadata_routing.MetadataRequest` encapsulating
+            routing information.
         """
         return self._get_metadata_request()
 

From a663b869f7455ead8bc144dbeb3e5d9d10665573 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 15 Mar 2022 15:15:47 +0100
Subject: [PATCH 07/16] set_score_request -> with_score_request on scorers

---
 doc/metadata_routing.rst                    | 21 ++++++++++++---------
 sklearn/metrics/_scorer.py                  |  4 ++--
 sklearn/metrics/tests/test_score_objects.py |  8 ++++----
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index 0e20f73966bf7..0ffb21c2fc9bd 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -12,9 +12,10 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass
 metadata to a method such as ``fit`` or ``score``, the object accepting the
 metadata, must *request* it. For estimators and splitters this is done via
 ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this
-is done via ``set_score_request`` method. For grouped splitters such as
-``GroupKFold`` a ``groups`` parameter is requested by default. This is best
-demonstrated by the following examples.
+is done via ``with_score_request`` method which creates a new scorer with the
+requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups``
+parameter is requested by default. This is best demonstrated by the following
+examples.
 
 If you are developing a scikit-learn compatible estimator or meta-estimator,
 you can check our related developer guide:
@@ -47,11 +48,11 @@ Weighted scoring and fitting
 
 Here ``GroupKFold`` requests ``groups`` by default. However, we need to
 explicitly request weights for our scorer and for ``LogisticRegressionCV``.
-Note that ``set_score_request`` on a scorer does not mutate the instance and
+Note that ``with_score_request`` on a scorer does not mutate the instance and
 instead returns a new instance with the metadata request attached to it. Both
 of these *consumers* know how to use metadata called ``"sample_weight"``::
 
-  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -83,7 +84,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample
 weights, so that :func:`~model_selection.cross_validate` does not pass the
 weights along::
 
-  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -113,7 +114,7 @@ Unweighted feature selection
 Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and
 therefore `"sample_weight"` is not routed to it::
 
-  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -138,7 +139,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key
 consumers. In this example, we pass ``scoring_weight`` to the scorer, and
 ``fitting_weight`` to ``LogisticRegressionCV``::
 
-  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
   ...    sample_weight="scoring_weight"
   ... )
   >>> lr = LogisticRegressionCV(
@@ -190,7 +191,9 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes
   ``my_weights`` is done at the router level, and not by the object, e.g.
   estimator, itself.
 
-For the scorers, this is done the same way, using ``set_score_request`` method.
+For the scorers, this is done using ``with_score_request`` method which returns
+a new instance of the scorer with the requested metadata and does not change
+the original scorer.
 
 If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata
 request for all objects which potentially can accept ``sample_weight`` should
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 290ec24cf2141..34f8562b6b301 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -247,8 +247,8 @@ def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
-    def set_score_request(self, **kwargs):
-        """Set requested parameters by the scorer.
+    def with_score_request(self, **kwargs):
+        """Create a scorer that requests metadata parameters.
 
         Note that this method returns a new instance of the scorer, and does
         **not** change the original scorer object.
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 749cedc25bff5..743701cc5fb85 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -630,7 +630,7 @@ def test_classification_scorer_sample_weight():
         else:
             target = y_test
         try:
-            scorer = scorer.set_score_request(sample_weight=True)
+            scorer = scorer.with_score_request(sample_weight=True)
             weighted = scorer(
                 estimator[name], X_test, target, sample_weight=sample_weight
             )
@@ -1148,11 +1148,11 @@ def test_scorer_no_op_multiclass_select_proba():
 
 @pytest.mark.parametrize("name, scorer", SCORERS.items())
 def test_scorer_metadata_request(name, scorer):
-    assert hasattr(scorer, "set_score_request")
+    assert hasattr(scorer, "with_score_request")
     assert hasattr(scorer, "get_metadata_routing")
 
-    weighted_scorer = scorer.set_score_request(sample_weight=True)
-    # set_score_request shouldn't mutate the instance
+    weighted_scorer = scorer.with_score_request(sample_weight=True)
+    # with_score_request shouldn't mutate the instance
     assert weighted_scorer is not scorer
 
     assert str(scorer.get_metadata_routing()) == "{}"

From e838a714d4b262932d804058e640464803c9b702 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 15 Mar 2022 18:38:05 +0100
Subject: [PATCH 08/16] warn on overlapping kwargs and metadata

---
 sklearn/metrics/_scorer.py                  | 53 +++++++++++++++++++--
 sklearn/metrics/tests/test_score_objects.py | 19 ++++++++
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 34f8562b6b301..58be34268750b 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -19,6 +19,7 @@
 # License: Simplified BSD
 
 import copy
+import warnings
 from collections.abc import Iterable
 from functools import partial
 from collections import Counter
@@ -247,6 +248,19 @@ def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
+    def _warn_overlap(self, message, kwargs):
+        """Warn if there is any overlap between ``self._kwargs`` and kwargs.
+
+        This method is intended to be used to check for overlap between
+        ``self._kwargs`` and ``kwargs`` passed as metadata.
+        """
+        _kwargs = set() if self._kwargs is None else set(self._kwargs.keys())
+        overlap = _kwargs.intersection(kwargs.keys())
+        if overlap:
+            warnings.warn(
+                f"{message} Overlapping parameters are: {overlap}", UserWarning
+            )
+
     def with_score_request(self, **kwargs):
         """Create a scorer that requests metadata parameters.
 
@@ -258,9 +272,19 @@ def with_score_request(self, **kwargs):
         Parameters
         ----------
         kwargs : dict
-            Arguments should be of the form param_name={True, False, None, str}.
-            The value can also be of the form RequestType
+            Arguments should be of the form ``param_name=alias``, and `alias`
+            can be either one of ``{True, False, None, str}`` or an instance of
+            RequestType.
         """
+        self._warn_overlap(
+            message=(
+                "You are setting metadata request for parameters which are "
+                "already set as kwargs for this metric. These set values will be "
+                "overridden by passed metadata if provided. Please pass them either "
+                "as metadata or kwargs to `make_scorer`."
+            ),
+            kwargs=kwargs,
+        )
         res = copy.deepcopy(self)
         res._metadata_request = MetadataRequest(owner=self.__class__.__name__)
         for param, alias in kwargs.items():
@@ -298,7 +322,14 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
         score : float
             Score function applied to prediction of estimator on X.
         """
-
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
         y_pred = method_caller(estimator, "predict", X)
         scoring_kwargs = copy.deepcopy(self._kwargs)
         scoring_kwargs.update(kwargs)
@@ -336,6 +367,14 @@ def _score(self, method_caller, clf, X, y, **kwargs):
         score : float
             Score function applied to prediction of estimator on X.
         """
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
 
         y_type = type_of_target(y)
         y_pred = method_caller(clf, "predict_proba", X)
@@ -392,6 +431,14 @@ def _score(self, method_caller, clf, X, y, **kwargs):
         score : float
             Score function applied to prediction of estimator on X.
         """
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
 
         y_type = type_of_target(y)
         if y_type not in ("binary", "multilabel-indicator"):
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 743701cc5fb85..2962fff8537db 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1175,3 +1175,22 @@ def test_scorer_metadata_request(name, scorer):
     router.validate_metadata(params={"sample_weight": 1}, method="score")
     routed_params = router.route_params(params={"sample_weight": 1}, caller="score")
     assert list(routed_params.scorer.score.keys()) == ["sample_weight"]
+
+
+def test_metadata_kwarg_conflict():
+    X, y = make_classification(
+        n_classes=3, n_informative=3, n_samples=20, random_state=0
+    )
+    lr = LogisticRegression().fit(X, y)
+
+    scorer = make_scorer(
+        roc_auc_score,
+        needs_proba=True,
+        multi_class="ovo",
+        labels=lr.classes_,
+    )
+    with pytest.warns(UserWarning, match="already set as kwargs"):
+        scorer = scorer.with_score_request(labels=True)
+
+    with pytest.warns(UserWarning, match="There is an overlap"):
+        scorer(lr, X, y, labels=lr.classes_)

From 0c6e2c454df2c2a58c904c8e9dee42f8e38ea3e8 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 15 Mar 2022 18:45:48 +0100
Subject: [PATCH 09/16] add references to docs

---
 sklearn/metrics/_scorer.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 58be34268750b..4b1f3a093b68a 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -234,6 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`with_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -267,6 +268,9 @@ def with_score_request(self, **kwargs):
         Note that this method returns a new instance of the scorer, and does
         **not** change the original scorer object.
 
+        Please see :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
         .. versionadded:: 1.1
 
         Parameters
@@ -314,6 +318,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`with_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -359,6 +364,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`with_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -423,6 +429,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`with_score_request` for more details.
 
             .. versionadded:: 1.1
 

From 0c61c05d3c66961d49b94129ccefc60afa955fb7 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 15 Mar 2022 19:08:42 +0100
Subject: [PATCH 10/16] add a note on custom scorers

---
 doc/modules/model_evaluation.rst | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 5b3604d403f3b..4a194adf1fa7d 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -224,6 +224,13 @@ the following two rules:
   Again, by convention higher numbers are better, so if your scorer
   returns loss, that value should be negated.
 
+- If it requires extra metadata to be passed to it, it should expose a
+  ``get_metadata_routing`` method returning the requested metadata. The user
+  should be able to set the requested metadata via a ``with_score_request``
+  method returning a copy of the scorer with the appropriate requests set.
+  Please see :ref:`User Guide <metadata_routing>` for more details.
+
+
 .. note:: **Using custom scorers in functions where n_jobs > 1**
 
     While defining the custom scoring function alongside the calling function
@@ -563,8 +570,8 @@ or *informedness*.
      Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples,
      and Case Studies <https://mitpress.mit.edu/books/fundamentals-machine-learning-predictive-data-analytics>`_,
      2015.
-  .. [Urbanowicz2015] Urbanowicz R.J.,  Moore, J.H. :doi:`ExSTraCS 2.0: description 
-      and evaluation of a scalable learning classifier 
+  .. [Urbanowicz2015] Urbanowicz R.J.,  Moore, J.H. :doi:`ExSTraCS 2.0: description
+      and evaluation of a scalable learning classifier
       system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89.
 
 .. _cohen_kappa:

From ba36307d611d790876a7320dfe95f6441ed31ce1 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Tue, 22 Mar 2022 16:15:11 +0100
Subject: [PATCH 11/16] Revert "set_score_request -> with_score_request on
 scorers"

This reverts commit a663b869f7455ead8bc144dbeb3e5d9d10665573.
---
 doc/metadata_routing.rst                    | 21 +++++++++------------
 sklearn/metrics/_scorer.py                  |  6 ++----
 sklearn/metrics/tests/test_score_objects.py |  8 ++++----
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index 0ffb21c2fc9bd..0e20f73966bf7 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -12,10 +12,9 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass
 metadata to a method such as ``fit`` or ``score``, the object accepting the
 metadata, must *request* it. For estimators and splitters this is done via
 ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this
-is done via ``with_score_request`` method which creates a new scorer with the
-requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups``
-parameter is requested by default. This is best demonstrated by the following
-examples.
+is done via ``set_score_request`` method. For grouped splitters such as
+``GroupKFold`` a ``groups`` parameter is requested by default. This is best
+demonstrated by the following examples.
 
 If you are developing a scikit-learn compatible estimator or meta-estimator,
 you can check our related developer guide:
@@ -48,11 +47,11 @@ Weighted scoring and fitting
 
 Here ``GroupKFold`` requests ``groups`` by default. However, we need to
 explicitly request weights for our scorer and for ``LogisticRegressionCV``.
-Note that ``with_score_request`` on a scorer does not mutate the instance and
+Note that ``set_score_request`` on a scorer does not mutate the instance and
 instead returns a new instance with the metadata request attached to it. Both
 of these *consumers* know how to use metadata called ``"sample_weight"``::
 
-  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -84,7 +83,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample
 weights, so that :func:`~model_selection.cross_validate` does not pass the
 weights along::
 
-  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -114,7 +113,7 @@ Unweighted feature selection
 Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and
 therefore `"sample_weight"` is not routed to it::
 
-  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True
   ... )
   >>> lr = LogisticRegressionCV(
@@ -139,7 +138,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key
 consumers. In this example, we pass ``scoring_weight`` to the scorer, and
 ``fitting_weight`` to ``LogisticRegressionCV``::
 
-  >>> weighted_acc = make_scorer(accuracy_score).with_score_request(
+  >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...    sample_weight="scoring_weight"
   ... )
   >>> lr = LogisticRegressionCV(
@@ -191,9 +190,7 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes
   ``my_weights`` is done at the router level, and not by the object, e.g.
   estimator, itself.
 
-For the scorers, this is done using ``with_score_request`` method which returns
-a new instance of the scorer with the requested metadata and does not change
-the original scorer.
+For the scorers, this is done the same way, using ``set_score_request`` method.
 
 If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata
 request for all objects which potentially can accept ``sample_weight`` should
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 58f5f72c145dc..e6be7d34673ca 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -25,8 +25,6 @@
 from collections import Counter
 
 import numpy as np
-import copy
-import warnings
 
 from . import (
     r2_score,
@@ -264,8 +262,8 @@ def _warn_overlap(self, message, kwargs):
                 f"{message} Overlapping parameters are: {overlap}", UserWarning
             )
 
-    def with_score_request(self, **kwargs):
-        """Create a scorer that requests metadata parameters.
+    def set_score_request(self, **kwargs):
+        """Set requested parameters by the scorer.
 
         Note that this method returns a new instance of the scorer, and does
         **not** change the original scorer object.
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index bec4e24c4d751..b9c18f4c7782d 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -631,7 +631,7 @@ def test_classification_scorer_sample_weight():
         else:
             target = y_test
         try:
-            scorer = scorer.with_score_request(sample_weight=True)
+            scorer = scorer.set_score_request(sample_weight=True)
             weighted = scorer(
                 estimator[name], X_test, target, sample_weight=sample_weight
             )
@@ -1161,11 +1161,11 @@ def test_scorer_no_op_multiclass_select_proba():
 
 @pytest.mark.parametrize("name, scorer", SCORERS.items())
 def test_scorer_metadata_request(name, scorer):
-    assert hasattr(scorer, "with_score_request")
+    assert hasattr(scorer, "set_score_request")
     assert hasattr(scorer, "get_metadata_routing")
 
-    weighted_scorer = scorer.with_score_request(sample_weight=True)
-    # with_score_request shouldn't mutate the instance
+    weighted_scorer = scorer.set_score_request(sample_weight=True)
+    # set_score_request shouldn't mutate the instance
     assert weighted_scorer is not scorer
 
     assert str(scorer.get_metadata_routing()) == "{}"

From 2bea2036dfa8d2688046731f8474fb200f1ff629 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 23 Mar 2022 10:14:32 +0100
Subject: [PATCH 12/16] set_score_request now mutates the instance

---
 doc/metadata_routing.rst                    |  4 +---
 doc/modules/model_evaluation.rst            |  5 ++---
 sklearn/metrics/_scorer.py                  | 18 +++++++-----------
 sklearn/metrics/tests/test_score_objects.py | 18 +++++++++++-------
 4 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index 0e20f73966bf7..de04ecf022415 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -47,9 +47,7 @@ Weighted scoring and fitting
 
 Here ``GroupKFold`` requests ``groups`` by default. However, we need to
 explicitly request weights for our scorer and for ``LogisticRegressionCV``.
-Note that ``set_score_request`` on a scorer does not mutate the instance and
-instead returns a new instance with the metadata request attached to it. Both
-of these *consumers* know how to use metadata called ``"sample_weight"``::
+Both of these *consumers* know how to use metadata called ``"sample_weight"``::
 
   >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index ea85973d5ee56..d6d21b3814918 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -227,9 +227,8 @@ the following two rules:
 
 - If it requires extra metadata to be passed to it, it should expose a
   ``get_metadata_routing`` method returning the requested metadata. The user
-  should be able to set the requested metadata via a ``with_score_request``
-  method returning a copy of the scorer with the appropriate requests set.
-  Please see :ref:`User Guide <metadata_routing>` for more details.
+  should be able to set the requested metadata via a ``set_score_request``
+  method. Please see :ref:`User Guide <metadata_routing>` for more details.
 
 
 .. note:: **Using custom scorers in functions where n_jobs > 1**
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index e6be7d34673ca..72a7e2a807c8f 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -234,7 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
-            Refer to :func:`with_score_request` for more details.
+            Refer to :func:`set_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -265,9 +265,6 @@ def _warn_overlap(self, message, kwargs):
     def set_score_request(self, **kwargs):
         """Set requested parameters by the scorer.
 
-        Note that this method returns a new instance of the scorer, and does
-        **not** change the original scorer object.
-
         Please see :ref:`User Guide <metadata_routing>` on how the routing
         mechanism works.
 
@@ -289,11 +286,10 @@ def set_score_request(self, **kwargs):
             ),
             kwargs=kwargs,
         )
-        res = copy.deepcopy(self)
-        res._metadata_request = MetadataRequest(owner=self.__class__.__name__)
+        self._metadata_request = MetadataRequest(owner=self.__class__.__name__)
         for param, alias in kwargs.items():
-            res._metadata_request.score.add_request(param=param, alias=alias)
-        return res
+            self._metadata_request.score.add_request(param=param, alias=alias)
+        return self
 
 
 class _PredictScorer(_BaseScorer):
@@ -318,7 +314,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
-            Refer to :func:`with_score_request` for more details.
+            Refer to :func:`set_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -364,7 +360,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
-            Refer to :func:`with_score_request` for more details.
+            Refer to :func:`set_score_request` for more details.
 
             .. versionadded:: 1.1
 
@@ -429,7 +425,7 @@ def _score(self, method_caller, clf, X, y, **kwargs):
 
         **kwargs : dict
             Other parameters passed to the scorer, e.g. sample_weight.
-            Refer to :func:`with_score_request` for more details.
+            Refer to :func:`set_score_request` for more details.
 
             .. versionadded:: 1.1
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index b9c18f4c7782d..85bf4f050f588 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1159,23 +1159,27 @@ def test_scorer_no_op_multiclass_select_proba():
     scorer(lr, X_test, y_test)
 
 
-@pytest.mark.parametrize("name, scorer", SCORERS.items())
-def test_scorer_metadata_request(name, scorer):
+@pytest.mark.parametrize("name", get_scorer_names(), ids=get_scorer_names())
+def test_scorer_metadata_request(name):
+    scorer = get_scorer(name)
     assert hasattr(scorer, "set_score_request")
     assert hasattr(scorer, "get_metadata_routing")
 
+    assert str(scorer.get_metadata_routing()) == "{}"
+
     weighted_scorer = scorer.set_score_request(sample_weight=True)
-    # set_score_request shouldn't mutate the instance
-    assert weighted_scorer is not scorer
+    # set_score_request should mutate the instance
+    assert weighted_scorer is scorer
 
-    assert str(scorer.get_metadata_routing()) == "{}"
     assert (
         str(weighted_scorer.get_metadata_routing())
         == "{'score': {'sample_weight': <RequestType.REQUESTED: True>}}"
     )
 
     # make sure putting the scorer in a router doesn't request anything
-    router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer)
+    router = MetadataRouter(owner="test").add(
+        method_mapping="score", scorer=get_scorer(name)
+    )
     with pytest.raises(TypeError, match="got unexpected argument"):
         router.validate_metadata(params={"sample_weight": 1}, method="score")
     routed_params = router.route_params(params={"sample_weight": 1}, caller="score")
@@ -1203,7 +1207,7 @@ def test_metadata_kwarg_conflict():
         labels=lr.classes_,
     )
     with pytest.warns(UserWarning, match="already set as kwargs"):
-        scorer = scorer.with_score_request(labels=True)
+        scorer.set_score_request(labels=True)
 
     with pytest.warns(UserWarning, match="There is an overlap"):
         scorer(lr, X, y, labels=lr.classes_)

From dba266f3222c17de3307a158d2e145d10ff2e5dd Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 23 Mar 2022 10:17:40 +0100
Subject: [PATCH 13/16] don't test repr

---
 sklearn/metrics/tests/test_score_objects.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 85bf4f050f588..90d2820d339e9 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -15,7 +15,8 @@
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.metadata_routing import MetadataRouter
+from sklearn.utils.metadata_routing import MetadataRouter, RequestType
+from sklearn.tests.test_metadata_routing import assert_request_is_empty
 
 from sklearn.base import BaseEstimator
 from sklearn.metrics import (
@@ -1165,15 +1166,16 @@ def test_scorer_metadata_request(name):
     assert hasattr(scorer, "set_score_request")
     assert hasattr(scorer, "get_metadata_routing")
 
-    assert str(scorer.get_metadata_routing()) == "{}"
+    assert_request_is_empty(scorer.get_metadata_routing())
 
     weighted_scorer = scorer.set_score_request(sample_weight=True)
     # set_score_request should mutate the instance
     assert weighted_scorer is scorer
 
+    assert_request_is_empty(weighted_scorer.get_metadata_routing(), exclude="score")
     assert (
-        str(weighted_scorer.get_metadata_routing())
-        == "{'score': {'sample_weight': <RequestType.REQUESTED: True>}}"
+        weighted_scorer.get_metadata_routing().score.requests["sample_weight"]
+        == RequestType.REQUESTED
     )
 
     # make sure putting the scorer in a router doesn't request anything

From 203b4e4efa410be4597db2772f3be37a50d6d128 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 23 Mar 2022 11:23:26 +0100
Subject: [PATCH 14/16] fix and test _passthrough_scorer

---
 sklearn/metrics/_scorer.py                  | 10 +++++++---
 sklearn/metrics/tests/test_score_objects.py | 13 +++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 72a7e2a807c8f..09a70cf7a393c 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -67,6 +67,7 @@
 from ..utils.metadata_routing import MetadataRequest
 from ..utils.metadata_routing import MetadataRouter
 from ..utils.metadata_routing import process_routing
+from ..utils.metadata_routing import get_routing_for_object
 
 
 def _cached_call(cache, estimator, method, *args, **kwargs):
@@ -541,9 +542,12 @@ def get_metadata_routing(self):
             A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
-        return MetadataRouter(owner=self.__class__.__name__).add(
-            estimator=self._estimator, method_mapping="score"
-        )
+        # This scorer doesn't do any validation or routing, it only exposes the
+        # score requests to the parent object. This object behaves as a
+        # consumer rather than a router.
+        res = MetadataRequest(owner=self._estimator.__class__.__name__)
+        res.score = get_routing_for_object(self._estimator).score
+        return res
 
 
 def check_scoring(estimator, scoring=None, *, allow_none=False):
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 90d2820d339e9..5aafb5c6b610d 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1213,3 +1213,16 @@ def test_metadata_kwarg_conflict():
 
     with pytest.warns(UserWarning, match="There is an overlap"):
         scorer(lr, X, y, labels=lr.classes_)
+
+
+def test_passthrough_scorer_metadata_request():
+    scorer = _passthrough_scorer(
+        estimator=LinearSVC()
+        .set_score_request(sample_weight="alias")
+        .set_fit_request(sample_weight=True)
+    )
+    # test that _passthrough_scorer leaves everything other than `score` empty
+    assert_request_is_empty(scorer.get_metadata_routing(), exclude="score")
+    # test that _passthrough_scorer doesn't behave like a router and leaves
+    # the request as is.
+    assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias"

From 4e32f318900ab43afb2fe28559e53be786ba21e9 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Wed, 23 Mar 2022 14:20:32 +0100
Subject: [PATCH 15/16] Joel's comments

---
 doc/modules/model_evaluation.rst            |  4 ++--
 sklearn/metrics/_scorer.py                  |  4 ++--
 sklearn/metrics/tests/test_score_objects.py | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index d6d21b3814918..1e97e3b6cab6f 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -225,8 +225,8 @@ the following two rules:
   Again, by convention higher numbers are better, so if your scorer
   returns loss, that value should be negated.
 
-- If it requires extra metadata to be passed to it, it should expose a
-  ``get_metadata_routing`` method returning the requested metadata. The user
+- Advanced: If it requires extra metadata to be passed to it, it should expose
+  a ``get_metadata_routing`` method returning the requested metadata. The user
   should be able to set the requested metadata via a ``set_score_request``
   method. Please see :ref:`User Guide <metadata_routing>` for more details.
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 09a70cf7a393c..ffc12add29095 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -523,7 +523,7 @@ def get_scorer(scoring):
     return scorer
 
 
-class _passthrough_scorer:
+class _PassthroughScorer:
     def __init__(self, estimator):
         self._estimator = estimator
 
@@ -602,7 +602,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
         return get_scorer(scoring)
     elif scoring is None:
         if hasattr(estimator, "score"):
-            return _passthrough_scorer(estimator)
+            return _PassthroughScorer(estimator)
         elif allow_none:
             return None
         else:
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 5aafb5c6b610d..187ffdb7368e3 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -39,7 +39,7 @@
 from sklearn.metrics import check_scoring
 from sklearn.metrics._scorer import (
     _PredictScorer,
-    _passthrough_scorer,
+    _PassthroughScorer,
     _MultimetricScorer,
     _check_multimetric_scoring,
 )
@@ -238,7 +238,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator):
     estimator = EstimatorWithFitAndScore()
     estimator.fit([[1]], [1])
     scorer = scoring_validator(estimator)
-    assert isinstance(scorer, _passthrough_scorer)
+    assert isinstance(scorer, _PassthroughScorer)
     assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
 
     estimator = EstimatorWithFitAndPredict()
@@ -1215,14 +1215,14 @@ def test_metadata_kwarg_conflict():
         scorer(lr, X, y, labels=lr.classes_)
 
 
-def test_passthrough_scorer_metadata_request():
-    scorer = _passthrough_scorer(
+def test_PassthroughScorer_metadata_request():
+    scorer = _PassthroughScorer(
         estimator=LinearSVC()
         .set_score_request(sample_weight="alias")
         .set_fit_request(sample_weight=True)
     )
-    # test that _passthrough_scorer leaves everything other than `score` empty
+    # test that _PassthroughScorer leaves everything other than `score` empty
     assert_request_is_empty(scorer.get_metadata_routing(), exclude="score")
-    # test that _passthrough_scorer doesn't behave like a router and leaves
+    # test that _PassthroughScorer doesn't behave like a router and leaves
     # the request as is.
     assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias"

From 44f91cecd1c509a8b77bb30766dfd1332d55f2a8 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Thu, 24 Mar 2022 10:30:30 +0100
Subject: [PATCH 16/16] Thomas's comments

---
 sklearn/metrics/_scorer.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index ffc12add29095..bf345ff6a0344 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -333,8 +333,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
             kwargs=kwargs,
         )
         y_pred = method_caller(estimator, "predict", X)
-        scoring_kwargs = copy.deepcopy(self._kwargs)
-        scoring_kwargs.update(kwargs)
+        scoring_kwargs = {**self._kwargs, **kwargs}
         return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
 
 
@@ -387,11 +386,10 @@ def _score(self, method_caller, clf, X, y, **kwargs):
             # Thus, we need to check for the shape of `y_pred`.
             y_pred = self._select_proba_binary(y_pred, clf.classes_)
 
-        scoring_kwargs = copy.deepcopy(self._kwargs)
-        scoring_kwargs.update(kwargs)
+        scoring_kwargs = {**self._kwargs, **kwargs}
         # this is for backward compatibility to avoid passing sample_weight
         # to the scorer if it's None
-        # probably remove in v1.3
+        # TODO(1.3) Probably remove
         if scoring_kwargs.get("sample_weight", -1) is None:
             del scoring_kwargs["sample_weight"]
 
@@ -473,11 +471,10 @@ def _score(self, method_caller, clf, X, y, **kwargs):
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
 
-        scoring_kwargs = copy.deepcopy(self._kwargs)
-        scoring_kwargs.update(kwargs)
+        scoring_kwargs = {**self._kwargs, **kwargs}
         # this is for backward compatibility to avoid passing sample_weight
         # to the scorer if it's None
-        # probably remove in v1.3
+        # TODO(1.3) Probably remove
         if scoring_kwargs.get("sample_weight", -1) is None:
             del scoring_kwargs["sample_weight"]
         return self._sign * self._score_func(y, y_pred, **scoring_kwargs)