Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/metadata_routing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Weighted scoring and fitting
----------------------------

Here ``GroupKFold`` requests ``groups`` by default. However, we need to
explicitly request weights in ``make_scorer`` and for ``LogisticRegressionCV``.
explicitly request weights for our scorer and for ``LogisticRegressionCV``.
Both of these *consumers* know how to use metadata called ``"sample_weight"``::

>>> weighted_acc = make_scorer(accuracy_score).set_score_request(
Expand Down
6 changes: 6 additions & 0 deletions doc/modules/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@ the following two rules:
Again, by convention higher numbers are better, so if your scorer
returns loss, that value should be negated.

- Advanced: If it requires extra metadata to be passed to it, it should expose
a ``get_metadata_routing`` method returning the requested metadata. The user
should be able to set the requested metadata via a ``set_score_request``
method. Please see :ref:`User Guide <metadata_routing>` for more details.


.. note:: **Using custom scorers in functions where n_jobs > 1**

While defining the custom scoring function alongside the calling function
Expand Down
2 changes: 1 addition & 1 deletion sklearn/inspection/_permutation_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

def _weights_scorer(scorer, estimator, X, y, sample_weight):
if sample_weight is not None:
return scorer(estimator, X, y, sample_weight)
return scorer(estimator, X, y, sample_weight=sample_weight)
return scorer(estimator, X, y)


Expand Down
214 changes: 166 additions & 48 deletions sklearn/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
# Arnaud Joly <arnaud.v.joly@gmail.com>
# License: Simplified BSD

import copy
import warnings
from collections.abc import Iterable
from functools import partial
from collections import Counter

import numpy as np
import copy
import warnings

from . import (
r2_score,
Expand Down Expand Up @@ -63,6 +63,11 @@

from ..utils.multiclass import type_of_target
from ..base import is_regressor
from ..utils.metadata_routing import _MetadataRequester
from ..utils.metadata_routing import MetadataRequest
from ..utils.metadata_routing import MetadataRouter
from ..utils.metadata_routing import process_routing
from ..utils.metadata_routing import get_routing_for_object


def _cached_call(cache, estimator, method, *args, **kwargs):
Expand Down Expand Up @@ -101,11 +106,15 @@ def __call__(self, estimator, *args, **kwargs):
cache = {} if self._use_cache(estimator) else None
cached_call = partial(_cached_call, cache)

params = process_routing(self, "score", kwargs)

for name, scorer in self._scorers.items():
if isinstance(scorer, _BaseScorer):
score = scorer._score(cached_call, estimator, *args, **kwargs)
score = scorer._score(
cached_call, estimator, *args, **params.get(name).score
)
else:
score = scorer(estimator, *args, **kwargs)
score = scorer(estimator, *args, **params.get(name).score)
scores[name] = score
return scores

Expand Down Expand Up @@ -140,8 +149,24 @@ def _use_cache(self, estimator):
return True
return False

def get_metadata_routing(self):
"""Get metadata routing of this object.

Please check :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.

class _BaseScorer:
Returns
-------
routing : MetadataRouter
A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
return MetadataRouter(owner=self.__class__.__name__).add(
**self._scorers, method_mapping="score"
)


class _BaseScorer(_MetadataRequester):
def __init__(self, score_func, sign, kwargs):
self._kwargs = kwargs
self._score_func = score_func
Expand Down Expand Up @@ -193,7 +218,7 @@ def __repr__(self):
kwargs_string,
)

def __call__(self, estimator, X, y_true, sample_weight=None):
def __call__(self, estimator, X, y_true, **kwargs):
"""Evaluate predicted target values for X relative to y_true.

Parameters
Expand All @@ -208,29 +233,68 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
y_true : array-like
Gold standard target values for X.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
**kwargs : dict
Other parameters passed to the scorer, e.g. sample_weight.
Refer to :func:`set_score_request` for more details.

.. versionadded:: 1.1

Returns
-------
score : float
Score function applied to prediction of estimator on X.
"""
return self._score(
partial(_cached_call, None),
estimator,
X,
y_true,
sample_weight=sample_weight,
)
return self._score(partial(_cached_call, None), estimator, X, y_true, **kwargs)

def _factory_args(self):
"""Return non-default make_scorer arguments for repr."""
return ""

def _warn_overlap(self, message, kwargs):
"""Warn if there is any overlap between ``self._kwargs`` and kwargs.

This method is intended to be used to check for overlap between
``self._kwargs`` and ``kwargs`` passed as metadata.
"""
_kwargs = set() if self._kwargs is None else set(self._kwargs.keys())
overlap = _kwargs.intersection(kwargs.keys())
if overlap:
warnings.warn(
f"{message} Overlapping parameters are: {overlap}", UserWarning
)

def set_score_request(self, **kwargs):
"""Set requested parameters by the scorer.

Please see :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.

.. versionadded:: 1.1

Parameters
----------
kwargs : dict
Arguments should be of the form ``param_name=alias``, and `alias`
can be either one of ``{True, False, None, str}`` or an instance of
RequestType.
"""
self._warn_overlap(
message=(
"You are setting metadata request for parameters which are "
"already set as kwargs for this metric. These set values will be "
"overridden by passed metadata if provided. Please pass them either "
"as metadata or kwargs to `make_scorer`."
),
kwargs=kwargs,
)
self._metadata_request = MetadataRequest(owner=self.__class__.__name__)
for param, alias in kwargs.items():
self._metadata_request.score.add_request(param=param, alias=alias)
return self


class _PredictScorer(_BaseScorer):
def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
def _score(self, method_caller, estimator, X, y_true, **kwargs):
"""Evaluate predicted target values for X relative to y_true.

Parameters
Expand All @@ -249,26 +313,32 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
y_true : array-like
Gold standard target values for X.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
**kwargs : dict
Other parameters passed to the scorer, e.g. sample_weight.
Refer to :func:`set_score_request` for more details.

.. versionadded:: 1.1

Returns
-------
score : float
Score function applied to prediction of estimator on X.
"""

self._warn_overlap(
message=(
"There is an overlap between set kwargs of this scorer instance and"
" passed metadata. Please pass them either as kwargs to `make_scorer`"
" or metadata, but not both."
),
kwargs=kwargs,
)
y_pred = method_caller(estimator, "predict", X)
if sample_weight is not None:
return self._sign * self._score_func(
y_true, y_pred, sample_weight=sample_weight, **self._kwargs
)
else:
return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
scoring_kwargs = {**self._kwargs, **kwargs}
return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)


class _ProbaScorer(_BaseScorer):
def _score(self, method_caller, clf, X, y, sample_weight=None):
def _score(self, method_caller, clf, X, y, **kwargs):
"""Evaluate predicted probabilities for X relative to y_true.

Parameters
Expand All @@ -288,14 +358,25 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
Gold standard target values for X. These must be class labels,
not probabilities.

sample_weight : array-like, default=None
Sample weights.
**kwargs : dict
Other parameters passed to the scorer, e.g. sample_weight.
Refer to :func:`set_score_request` for more details.

.. versionadded:: 1.1

Returns
-------
score : float
Score function applied to prediction of estimator on X.
"""
self._warn_overlap(
message=(
"There is an overlap between set kwargs of this scorer instance and"
" passed metadata. Please pass them either as kwargs to `make_scorer`"
" or metadata, but not both."
),
kwargs=kwargs,
)

y_type = type_of_target(y)
y_pred = method_caller(clf, "predict_proba", X)
Expand All @@ -304,19 +385,22 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
# problem: (when only 2 class are given to `y_true` during scoring)
# Thus, we need to check for the shape of `y_pred`.
y_pred = self._select_proba_binary(y_pred, clf.classes_)
if sample_weight is not None:
return self._sign * self._score_func(
y, y_pred, sample_weight=sample_weight, **self._kwargs
)
else:
return self._sign * self._score_func(y, y_pred, **self._kwargs)

scoring_kwargs = {**self._kwargs, **kwargs}
# this is for backward compatibility to avoid passing sample_weight
# to the scorer if it's None
# TODO(1.3) Probably remove
if scoring_kwargs.get("sample_weight", -1) is None:
del scoring_kwargs["sample_weight"]

return self._sign * self._score_func(y, y_pred, **scoring_kwargs)

def _factory_args(self):
return ", needs_proba=True"


class _ThresholdScorer(_BaseScorer):
def _score(self, method_caller, clf, X, y, sample_weight=None):
def _score(self, method_caller, clf, X, y, **kwargs):
"""Evaluate decision function output for X relative to y_true.

Parameters
Expand All @@ -338,14 +422,25 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
Gold standard target values for X. These must be class labels,
not decision function values.

sample_weight : array-like, default=None
Sample weights.
**kwargs : dict
Other parameters passed to the scorer, e.g. sample_weight.
Refer to :func:`set_score_request` for more details.

.. versionadded:: 1.1

Returns
-------
score : float
Score function applied to prediction of estimator on X.
"""
self._warn_overlap(
message=(
"There is an overlap between set kwargs of this scorer instance and"
" passed metadata. Please pass them either as kwargs to `make_scorer`"
" or metadata, but not both."
),
kwargs=kwargs,
)

y_type = type_of_target(y)
if y_type not in ("binary", "multilabel-indicator"):
Expand Down Expand Up @@ -376,12 +471,13 @@ def _score(self, method_caller, clf, X, y, sample_weight=None):
elif isinstance(y_pred, list):
y_pred = np.vstack([p[:, -1] for p in y_pred]).T

if sample_weight is not None:
return self._sign * self._score_func(
y, y_pred, sample_weight=sample_weight, **self._kwargs
)
else:
return self._sign * self._score_func(y, y_pred, **self._kwargs)
scoring_kwargs = {**self._kwargs, **kwargs}
# this is for backward compatibility to avoid passing sample_weight
# to the scorer if it's None
# TODO(1.3) Probably remove
if scoring_kwargs.get("sample_weight", -1) is None:
del scoring_kwargs["sample_weight"]
return self._sign * self._score_func(y, y_pred, **scoring_kwargs)

def _factory_args(self):
return ", needs_threshold=True"
Expand Down Expand Up @@ -424,9 +520,31 @@ def get_scorer(scoring):
return scorer


def _passthrough_scorer(estimator, *args, **kwargs):
"""Function that wraps estimator.score"""
return estimator.score(*args, **kwargs)
class _PassthroughScorer:
def __init__(self, estimator):
self._estimator = estimator

def __call__(self, estimator, *args, **kwargs):
"""Method that wraps estimator.score"""
return estimator.score(*args, **kwargs)

def get_metadata_routing(self):
"""Get requested data properties.

.. versionadded:: 1.1

Returns
-------
routing : MetadataRouter
A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
# This scorer doesn't do any validation or routing, it only exposes the
# score requests to the parent object. This object behaves as a
# consumer rather than a router.
res = MetadataRequest(owner=self._estimator.__class__.__name__)
res.score = get_routing_for_object(self._estimator).score
return res


def check_scoring(estimator, scoring=None, *, allow_none=False):
Expand Down Expand Up @@ -481,7 +599,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
return get_scorer(scoring)
elif scoring is None:
if hasattr(estimator, "score"):
return _passthrough_scorer
return _PassthroughScorer(estimator)
elif allow_none:
return None
else:
Expand Down Expand Up @@ -614,7 +732,7 @@ def make_scorer(
----------
score_func : callable
Score function (or loss function) with signature
`score_func(y, y_pred, **kwargs)`.
``score_func(y, y_pred, **kwargs)``.

greater_is_better : bool, default=True
Whether `score_func` is a score function (default), meaning high is
Expand Down
Loading