scikit-learn · thomasjpfan · Mar 24, 2022 · Mar 10, 2022 · Mar 10, 2022 · Mar 10, 2022
diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
@@ -46,7 +46,7 @@ Weighted scoring and fitting
 ----------------------------
 
 Here ``GroupKFold`` requests ``groups`` by default. However, we need to
-explicitly request weights in ``make_scorer`` and for ``LogisticRegressionCV``.
+explicitly request weights for our scorer and for ``LogisticRegressionCV``.
 Both of these *consumers* know how to use metadata called ``"sample_weight"``::
 
   >>> weighted_acc = make_scorer(accuracy_score).set_score_request(

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -225,6 +225,12 @@ the following two rules:
   Again, by convention higher numbers are better, so if your scorer
   returns loss, that value should be negated.
 
+- Advanced: If it requires extra metadata to be passed to it, it should expose
+  a ``get_metadata_routing`` method returning the requested metadata. The user
+  should be able to set the requested metadata via a ``set_score_request``
+  method. Please see :ref:`User Guide <metadata_routing>` for more details.
+
+
 .. note:: **Using custom scorers in functions where n_jobs > 1**
 
     While defining the custom scoring function alongside the calling function

diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
@@ -15,7 +15,7 @@
 
 def _weights_scorer(scorer, estimator, X, y, sample_weight):
     if sample_weight is not None:
-        return scorer(estimator, X, y, sample_weight)
+        return scorer(estimator, X, y, sample_weight=sample_weight)
     return scorer(estimator, X, y)
 
 

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
@@ -18,13 +18,13 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
+import copy
+import warnings
 from collections.abc import Iterable
 from functools import partial
 from collections import Counter
 
 import numpy as np
-import copy
-import warnings
 
 from . import (
     r2_score,
@@ -63,6 +63,11 @@
 
 from ..utils.multiclass import type_of_target
 from ..base import is_regressor
+from ..utils.metadata_routing import _MetadataRequester
+from ..utils.metadata_routing import MetadataRequest
+from ..utils.metadata_routing import MetadataRouter
+from ..utils.metadata_routing import process_routing
+from ..utils.metadata_routing import get_routing_for_object
 
 
 def _cached_call(cache, estimator, method, *args, **kwargs):
@@ -101,11 +106,15 @@ def __call__(self, estimator, *args, **kwargs):
         cache = {} if self._use_cache(estimator) else None
         cached_call = partial(_cached_call, cache)
 
+        params = process_routing(self, "score", kwargs)
+
         for name, scorer in self._scorers.items():
             if isinstance(scorer, _BaseScorer):
-                score = scorer._score(cached_call, estimator, *args, **kwargs)
+                score = scorer._score(
+                    cached_call, estimator, *args, **params.get(name).score
+                )
             else:
-                score = scorer(estimator, *args, **kwargs)
+                score = scorer(estimator, *args, **params.get(name).score)
             scores[name] = score
         return scores
 
@@ -140,8 +149,24 @@ def _use_cache(self, estimator):
                 return True
         return False
 
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
 
-class _BaseScorer:
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        return MetadataRouter(owner=self.__class__.__name__).add(
+            **self._scorers, method_mapping="score"
+        )
+
+
+class _BaseScorer(_MetadataRequester):
     def __init__(self, score_func, sign, kwargs):
         self._kwargs = kwargs
         self._score_func = score_func
@@ -193,7 +218,7 @@ def __repr__(self):
             kwargs_string,
         )
 
-    def __call__(self, estimator, X, y_true, sample_weight=None):
+    def __call__(self, estimator, X, y_true, **kwargs):
         """Evaluate predicted target values for X relative to y_true.
 
         Parameters
@@ -208,29 +233,68 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`set_score_request` for more details.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
         score : float
             Score function applied to prediction of estimator on X.
         """
-        return self._score(
-            partial(_cached_call, None),
-            estimator,
-            X,
-            y_true,
-            sample_weight=sample_weight,
-        )
+        return self._score(partial(_cached_call, None), estimator, X, y_true, **kwargs)
 
     def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
+    def _warn_overlap(self, message, kwargs):
+        """Warn if there is any overlap between ``self._kwargs`` and kwargs.
+
+        This method is intended to be used to check for overlap between
+        ``self._kwargs`` and ``kwargs`` passed as metadata.
+        """
+        _kwargs = set() if self._kwargs is None else set(self._kwargs.keys())
+        overlap = _kwargs.intersection(kwargs.keys())
+        if overlap:
+            warnings.warn(
+                f"{message} Overlapping parameters are: {overlap}", UserWarning
+            )
+
+    def set_score_request(self, **kwargs):
+        """Set requested parameters by the scorer.
+
+        Please see :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.1
+
+        Parameters
+        ----------
+        kwargs : dict
+            Arguments should be of the form ``param_name=alias``, and `alias`
+            can be either one of ``{True, False, None, str}`` or an instance of
+            RequestType.
+        """
+        self._warn_overlap(
+            message=(
+                "You are setting metadata request for parameters which are "
+                "already set as kwargs for this metric. These set values will be "
+                "overridden by passed metadata if provided. Please pass them either "
+                "as metadata or kwargs to `make_scorer`."
+            ),
+            kwargs=kwargs,
+        )
+        self._metadata_request = MetadataRequest(owner=self.__class__.__name__)
+        for param, alias in kwargs.items():
+            self._metadata_request.score.add_request(param=param, alias=alias)
+        return self
+
 
 class _PredictScorer(_BaseScorer):
-    def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
+    def _score(self, method_caller, estimator, X, y_true, **kwargs):
         """Evaluate predicted target values for X relative to y_true.
 
         Parameters
@@ -249,26 +313,32 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`set_score_request` for more details.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
         score : float
             Score function applied to prediction of estimator on X.
         """
-
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
         y_pred = method_caller(estimator, "predict", X)
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y_true, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
+        scoring_kwargs = {**self._kwargs, **kwargs}
+        return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
 
 
 class _ProbaScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, **kwargs):
         """Evaluate predicted probabilities for X relative to y_true.
 
         Parameters
@@ -288,14 +358,25 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             Gold standard target values for X. These must be class labels,
             not probabilities.
 
-        sample_weight : array-like, default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`set_score_request` for more details.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
         score : float
             Score function applied to prediction of estimator on X.
         """
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
 
         y_type = type_of_target(y)
         y_pred = method_caller(clf, "predict_proba", X)
@@ -304,19 +385,22 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             # problem: (when only 2 class are given to `y_true` during scoring)
             # Thus, we need to check for the shape of `y_pred`.
             y_pred = self._select_proba_binary(y_pred, clf.classes_)
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y, y_pred, **self._kwargs)
+
+        scoring_kwargs = {**self._kwargs, **kwargs}
+        # this is for backward compatibility to avoid passing sample_weight
+        # to the scorer if it's None
+        # TODO(1.3) Probably remove
+        if scoring_kwargs.get("sample_weight", -1) is None:
+            del scoring_kwargs["sample_weight"]
+
+        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
 
     def _factory_args(self):
         return ", needs_proba=True"
 
 
 class _ThresholdScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, sample_weight=None):
+    def _score(self, method_caller, clf, X, y, **kwargs):
         """Evaluate decision function output for X relative to y_true.
 
         Parameters
@@ -338,14 +422,25 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
             Gold standard target values for X. These must be class labels,
             not decision function values.
 
-        sample_weight : array-like, default=None
-            Sample weights.
+        **kwargs : dict
+            Other parameters passed to the scorer, e.g. sample_weight.
+            Refer to :func:`set_score_request` for more details.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
         score : float
             Score function applied to prediction of estimator on X.
         """
+        self._warn_overlap(
+            message=(
+                "There is an overlap between set kwargs of this scorer instance and"
+                " passed metadata. Please pass them either as kwargs to `make_scorer`"
+                " or metadata, but not both."
+            ),
+            kwargs=kwargs,
+        )
 
         y_type = type_of_target(y)
         if y_type not in ("binary", "multilabel-indicator"):
@@ -376,12 +471,13 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
                 elif isinstance(y_pred, list):
                     y_pred = np.vstack([p[:, -1] for p in y_pred]).T
 
-        if sample_weight is not None:
-            return self._sign * self._score_func(
-                y, y_pred, sample_weight=sample_weight, **self._kwargs
-            )
-        else:
-            return self._sign * self._score_func(y, y_pred, **self._kwargs)
+        scoring_kwargs = {**self._kwargs, **kwargs}
+        # this is for backward compatibility to avoid passing sample_weight
+        # to the scorer if it's None
+        # TODO(1.3) Probably remove
+        if scoring_kwargs.get("sample_weight", -1) is None:
+            del scoring_kwargs["sample_weight"]
+        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
 
     def _factory_args(self):
         return ", needs_threshold=True"
@@ -424,9 +520,31 @@ def get_scorer(scoring):
     return scorer
 
 
-def _passthrough_scorer(estimator, *args, **kwargs):
-    """Function that wraps estimator.score"""
-    return estimator.score(*args, **kwargs)
+class _PassthroughScorer:
+    def __init__(self, estimator):
+        self._estimator = estimator
+
+    def __call__(self, estimator, *args, **kwargs):
+        """Method that wraps estimator.score"""
+        return estimator.score(*args, **kwargs)
+
+    def get_metadata_routing(self):
+        """Get requested data properties.
+
+        .. versionadded:: 1.1
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        # This scorer doesn't do any validation or routing, it only exposes the
+        # score requests to the parent object. This object behaves as a
+        # consumer rather than a router.
+        res = MetadataRequest(owner=self._estimator.__class__.__name__)
+        res.score = get_routing_for_object(self._estimator).score
+        return res
 
 
 def check_scoring(estimator, scoring=None, *, allow_none=False):
@@ -481,7 +599,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
         return get_scorer(scoring)
     elif scoring is None:
         if hasattr(estimator, "score"):
-            return _passthrough_scorer
+            return _PassthroughScorer(estimator)
         elif allow_none:
             return None
         else:
@@ -614,7 +732,7 @@ def make_scorer(
     ----------
     score_func : callable
         Score function (or loss function) with signature
-        `score_func(y, y_pred, **kwargs)`.
+        ``score_func(y, y_pred, **kwargs)``.
 
     greater_is_better : bool, default=True
         Whether `score_func` is a score function (default), meaning high is