From 1fbda386e8e647ff1e59067c2bd73ea2dbeb172f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:25:48 +0100 Subject: [PATCH 01/16] FEAT add metadata routing support to scorers --- sklearn/metrics/_scorer.py | 159 ++++++++++++++------ sklearn/metrics/tests/test_score_objects.py | 22 ++- sklearn/utils/metadata_routing.py | 1 + 3 files changed, 136 insertions(+), 46 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 1e8e330d1af81..4418f3637be8b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,6 +18,7 @@ # Arnaud Joly # License: Simplified BSD +import copy from collections.abc import Iterable from functools import partial from collections import Counter @@ -61,6 +62,10 @@ from ..utils.multiclass import type_of_target from ..base import is_regressor +from ..utils.metadata_routing import _MetadataRequester +from ..utils.metadata_routing import MetadataRequest +from ..utils.metadata_routing import MetadataRouter +from ..utils.metadata_routing import process_routing def _cached_call(cache, estimator, method, *args, **kwargs): @@ -99,11 +104,15 @@ def __call__(self, estimator, *args, **kwargs): cache = {} if self._use_cache(estimator) else None cached_call = partial(_cached_call, cache) + params = process_routing(self, "score", kwargs) + for name, scorer in self._scorers.items(): if isinstance(scorer, _BaseScorer): - score = scorer._score(cached_call, estimator, *args, **kwargs) + score = scorer._score( + cached_call, estimator, *args, **params.get(name).score + ) else: - score = scorer(estimator, *args, **kwargs) + score = scorer(estimator, *args, **params.get(name).score) scores[name] = score return scores @@ -138,8 +147,23 @@ def _use_cache(self, estimator): return True return False + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + Returns + ------- + routing : dict + A dict representing a MetadataRouter. + """ + return MetadataRouter(owner=self.__class__.__name__).add( + **self._scorers, method_mapping="score" + ) + -class _BaseScorer: +class _BaseScorer(_MetadataRequester): def __init__(self, score_func, sign, kwargs): self._kwargs = kwargs self._score_func = score_func @@ -191,7 +215,7 @@ def __repr__(self): kwargs_string, ) - def __call__(self, estimator, X, y_true, sample_weight=None): + def __call__(self, estimator, X, y_true, **kwargs): """Evaluate predicted target values for X relative to y_true. Parameters @@ -206,29 +230,45 @@ def __call__(self, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- score : float Score function applied to prediction of estimator on X. """ - return self._score( - partial(_cached_call, None), - estimator, - X, - y_true, - sample_weight=sample_weight, - ) + return self._score(partial(_cached_call, None), estimator, X, y_true, **kwargs) def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" + def set_score_request(self, **kwargs): + """Set requested parameters by the scorer. + + Note that this method returns a new instance of the scorer, and does + **not** change the original scorer object. + + .. versionadded:: 1.1 + + Parameters + ---------- + kwargs : dict + Arguments should be of the form param_name={True, False, None, str}. + The value can also be of the form RequestType + """ + res = copy.deepcopy(self) + res._metadata_request = MetadataRequest(owner=self.__class__.__name__) + for param, alias in kwargs.items(): + res._metadata_request.score.add_request(param=param, alias=alias) + return res + class _PredictScorer(_BaseScorer): - def _score(self, method_caller, estimator, X, y_true, sample_weight=None): + def _score(self, method_caller, estimator, X, y_true, **kwargs): """Evaluate predicted target values for X relative to y_true. Parameters @@ -247,8 +287,10 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like of shape (n_samples,), default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -257,16 +299,13 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): """ y_pred = method_caller(estimator, "predict", X) - if sample_weight is not None: - return self._sign * self._score_func( - y_true, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y_true, y_pred, **self._kwargs) + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) class _ProbaScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, sample_weight=None): + def _score(self, method_caller, clf, X, y, **kwargs): """Evaluate predicted probabilities for X relative to y_true. Parameters @@ -286,8 +325,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): Gold standard target values for X. These must be class labels, not probabilities. - sample_weight : array-like, default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -302,19 +343,23 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): # problem: (when only 2 class are given to `y_true` during scoring) # Thus, we need to check for the shape of `y_pred`. y_pred = self._select_proba_binary(y_pred, clf.classes_) - if sample_weight is not None: - return self._sign * self._score_func( - y, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y, y_pred, **self._kwargs) + + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + # this is for backward compatibility to avoid passing sample_weight + # to the scorer if it's None + # probably remove in v1.3 + if scoring_kwargs.get("sample_weight", -1) is None: + del scoring_kwargs["sample_weight"] + + return self._sign * self._score_func(y, y_pred, **scoring_kwargs) def _factory_args(self): return ", needs_proba=True" class _ThresholdScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, sample_weight=None): + def _score(self, method_caller, clf, X, y, **kwargs): """Evaluate decision function output for X relative to y_true. Parameters @@ -336,8 +381,10 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): Gold standard target values for X. These must be class labels, not decision function values. - sample_weight : array-like, default=None - Sample weights. + **kwargs : dict + Other parameters passed to the scorer. + + .. versionadded:: 1.1 Returns ------- @@ -374,12 +421,14 @@ def _score(self, method_caller, clf, X, y, sample_weight=None): elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T - if sample_weight is not None: - return self._sign * self._score_func( - y, y_pred, sample_weight=sample_weight, **self._kwargs - ) - else: - return self._sign * self._score_func(y, y_pred, **self._kwargs) + scoring_kwargs = copy.deepcopy(self._kwargs) + scoring_kwargs.update(kwargs) + # this is for backward compatibility to avoid passing sample_weight + # to the scorer if it's None + # probably remove in v1.3 + if scoring_kwargs.get("sample_weight", -1) is None: + del scoring_kwargs["sample_weight"] + return self._sign * self._score_func(y, y_pred, **scoring_kwargs) def _factory_args(self): return ", needs_threshold=True" @@ -414,9 +463,29 @@ def get_scorer(scoring): return scorer -def _passthrough_scorer(estimator, *args, **kwargs): - """Function that wraps estimator.score""" - return estimator.score(*args, **kwargs) +class _passthrough_scorer: + def __init__(self, estimator): + self._estimator = estimator + + def __call__(self, estimator, *args, **kwargs): + """Method that wraps estimator.score""" + return estimator.score(*args, **kwargs) + + def get_metadata_request(self): + """Get requested data properties. + + .. versionadded:: 1.1 + + Returns + ------- + request : dict + A dict of dict of str->value. The key to the first dict is the name + of the method, and the key to the second dict is the name of the + argument requested by the method. + """ + return MetadataRouter(owner=self.__class__.__name__).add( + estimator=self._estimator, method_mapping="score" + ) def check_scoring(estimator, scoring=None, *, allow_none=False): @@ -471,7 +540,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): return get_scorer(scoring) elif scoring is None: if hasattr(estimator, "score"): - return _passthrough_scorer + return _passthrough_scorer(estimator) elif allow_none: return None else: @@ -604,7 +673,7 @@ def make_scorer( ---------- score_func : callable Score function (or loss function) with signature - `score_func(y, y_pred, **kwargs)`. + ``score_func(y, y_pred, **kwargs)``. greater_is_better : bool, default=True Whether `score_func` is a score function (default), meaning high is diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 3b3caceb9970a..7b21fac04c02c 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -236,7 +236,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator): estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = scoring_validator(estimator) - assert scorer is _passthrough_scorer + assert isinstance(scorer, _passthrough_scorer) assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFitAndPredict() @@ -629,11 +629,14 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) ignored = scorer(estimator[name], X_test[10:], target[10:]) unweighted = scorer(estimator[name], X_test, target) + # this should not raise. sample_weight should be ignored if None. + _ = scorer(estimator[name], X_test[:10], target[:10], sample_weight=None) assert weighted != unweighted, ( f"scorer {name} behaves identically when called with " f"sample weights: {weighted} vs {unweighted}" @@ -677,6 +680,7 @@ def test_regression_scorer_sample_weight(): # skip classification scorers continue try: + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight) ignored = scorer(reg, X_test[11:], y_test[11:]) unweighted = scorer(reg, X_test, y_test) @@ -1140,3 +1144,19 @@ def test_scorer_no_op_multiclass_select_proba(): labels=lr.classes_, ) scorer(lr, X_test, y_test) + + +@pytest.mark.parametrize("name, scorer", SCORERS.items()) +def test_scorer_metadata_request(name, scorer): + assert hasattr(scorer, "set_score_request") + assert hasattr(scorer, "get_metadata_routing") + + weighted_scorer = scorer.set_score_request(sample_weight=True) + # set_score_request shouldn't mutate the instance + assert weighted_scorer is not scorer + + assert str(scorer.get_metadata_routing()) == "{}" + assert ( + str(weighted_scorer.get_metadata_routing()) + == "{'score': {'sample_weight': }}" + ) diff --git a/sklearn/utils/metadata_routing.py b/sklearn/utils/metadata_routing.py index 1b3ba6e33b1f7..89ba5b5a9253c 100644 --- a/sklearn/utils/metadata_routing.py +++ b/sklearn/utils/metadata_routing.py @@ -14,3 +14,4 @@ from ._metadata_requests import MetadataRequest # noqa from ._metadata_requests import MethodMapping # noqa from ._metadata_requests import process_routing # noqa +from ._metadata_requests import _MetadataRequester # noqa From 51baf9d277bc0498d104880e9a54cf3c598d1eef Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:29:01 +0100 Subject: [PATCH 02/16] clarify doc on not mutating scorers --- doc/metadata_routing.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index c6641ada487db..0e20f73966bf7 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -46,8 +46,10 @@ Weighted scoring and fitting ---------------------------- Here ``GroupKFold`` requests ``groups`` by default. However, we need to -explicitly request weights in ``make_scorer`` and for ``LogisticRegressionCV``. -Both of these *consumers* know how to use metadata called ``"sample_weight"``:: +explicitly request weights for our scorer and for ``LogisticRegressionCV``. +Note that ``set_score_request`` on a scorer does not mutate the instance and +instead returns a new instance with the metadata request attached to it. Both +of these *consumers* know how to use metadata called ``"sample_weight"``:: >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True From 78638c40a0d4161c7db91131fbe1ae1bbd386b67 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:40:44 +0100 Subject: [PATCH 03/16] improve tests --- sklearn/metrics/tests/test_score_objects.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 7b21fac04c02c..79e49bc68b4d1 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -15,6 +15,8 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings +from sklearn.utils.metadata_routing import MetadataRouter +from sklearn.utils.metadata_routing import process_routing from sklearn.base import BaseEstimator from sklearn.metrics import ( @@ -1160,3 +1162,18 @@ def test_scorer_metadata_request(name, scorer): str(weighted_scorer.get_metadata_routing()) == "{'score': {'sample_weight': }}" ) + + # make sure putting the scorer in a router doesn't request anything + router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score") + with pytest.raises(TypeError, match="got unexpected argument"): + router.validate_metadata(params={"sample_weight": 1}, method="score") + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + assert not routed_params.scorer.score + + # make sure putting weighted_scorer in a router requests sample_weight + router = MetadataRouter(owner="test").add( + scorer=weighted_scorer, method_mapping="score" + ) + router.validate_metadata(params={"sample_weight": 1}, method="score") + routed_params = router.route_params(params={"sample_weight": 1}, caller="score") + assert list(routed_params.scorer.score.keys()) == ["sample_weight"] From ea317bc27e5bab252dd8304ddea1f9b16fb0d789 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 10 Mar 2022 22:44:10 +0100 Subject: [PATCH 04/16] remove unused import --- sklearn/metrics/tests/test_score_objects.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 79e49bc68b4d1..02ca8352d477b 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -16,7 +16,6 @@ from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings from sklearn.utils.metadata_routing import MetadataRouter -from sklearn.utils.metadata_routing import process_routing from sklearn.base import BaseEstimator from sklearn.metrics import ( From 6c6c2f1044e74283a70c1fd720baa09effad5e46 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 11 Mar 2022 10:49:00 +0100 Subject: [PATCH 05/16] fix tests --- sklearn/inspection/_permutation_importance.py | 2 +- sklearn/model_selection/tests/test_search.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 7ca586efe8630..70b9c89c62126 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -15,7 +15,7 @@ def _weights_scorer(scorer, estimator, X, y, sample_weight): if sample_weight is not None: - return scorer(estimator, X, y, sample_weight) + return scorer(estimator, X, y, sample_weight=sample_weight) return scorer(estimator, X, y) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index a4dcc201c0bb1..4611669b2b974 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1902,7 +1902,13 @@ def _run_search(self, evaluate): attr[0].islower() and attr[-1:] == "_" and attr - not in {"cv_results_", "best_estimator_", "refit_time_", "classes_"} + not in { + "cv_results_", + "best_estimator_", + "refit_time_", + "classes_", + "scorer_", + } ): assert getattr(gscv, attr) == getattr(mycv, attr), ( "Attribute %s not equal" % attr From b24c3addec0b2b7611b4ff656772eebbf7d3f886 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 11 Mar 2022 16:05:06 +0100 Subject: [PATCH 06/16] Christian's comments --- sklearn/metrics/_scorer.py | 22 ++++++++++----------- sklearn/metrics/tests/test_score_objects.py | 3 +-- sklearn/utils/_metadata_requests.py | 5 +++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 4418f3637be8b..290ec24cf2141 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -155,8 +155,9 @@ def get_metadata_routing(self): Returns ------- - routing : dict - A dict representing a MetadataRouter. + routing : MetadataRouter + A :class:`~utils.metadata_routing.MetadataRouter` encapsulating + routing information. """ return MetadataRouter(owner=self.__class__.__name__).add( **self._scorers, method_mapping="score" @@ -231,7 +232,7 @@ def __call__(self, estimator, X, y_true, **kwargs): Gold standard target values for X. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -288,7 +289,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): Gold standard target values for X. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -326,7 +327,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): not probabilities. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -382,7 +383,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): not decision function values. **kwargs : dict - Other parameters passed to the scorer. + Other parameters passed to the scorer, e.g. sample_weight. .. versionadded:: 1.1 @@ -471,17 +472,16 @@ def __call__(self, estimator, *args, **kwargs): """Method that wraps estimator.score""" return estimator.score(*args, **kwargs) - def get_metadata_request(self): + def get_metadata_routing(self): """Get requested data properties. .. versionadded:: 1.1 Returns ------- - request : dict - A dict of dict of str->value. The key to the first dict is the name - of the method, and the key to the second dict is the name of the - argument requested by the method. + routing : MetadataRouter + A :class:`~utils.metadata_routing.MetadataRouter` encapsulating + routing information. """ return MetadataRouter(owner=self.__class__.__name__).add( estimator=self._estimator, method_mapping="score" diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 02ca8352d477b..749cedc25bff5 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -681,7 +681,6 @@ def test_regression_scorer_sample_weight(): # skip classification scorers continue try: - scorer = scorer.set_score_request(sample_weight=True) weighted = scorer(reg, X_test, y_test, sample_weight=sample_weight) ignored = scorer(reg, X_test[11:], y_test[11:]) unweighted = scorer(reg, X_test, y_test) @@ -1163,7 +1162,7 @@ def test_scorer_metadata_request(name, scorer): ) # make sure putting the scorer in a router doesn't request anything - router = MetadataRouter(owner="test").add(scorer=scorer, method_mapping="score") + router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer) with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 5719750ee5d58..9f6527f6809bf 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1112,8 +1112,9 @@ def get_metadata_routing(self): Returns ------- - routing : dict - A dict representing a MetadataRouter. + routing : MetadataRequest + A :class:`~utils.metadata_routing.MetadataRequest` encapsulating + routing information. """ return self._get_metadata_request() From a663b869f7455ead8bc144dbeb3e5d9d10665573 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 15:15:47 +0100 Subject: [PATCH 07/16] set_score_request -> with_score_request on scorers --- doc/metadata_routing.rst | 21 ++++++++++++--------- sklearn/metrics/_scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0e20f73966bf7..0ffb21c2fc9bd 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -12,9 +12,10 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass metadata to a method such as ``fit`` or ``score``, the object accepting the metadata, must *request* it. For estimators and splitters this is done via ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this -is done via ``set_score_request`` method. For grouped splitters such as -``GroupKFold`` a ``groups`` parameter is requested by default. This is best -demonstrated by the following examples. +is done via ``with_score_request`` method which creates a new scorer with the +requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups`` +parameter is requested by default. This is best demonstrated by the following +examples. If you are developing a scikit-learn compatible estimator or meta-estimator, you can check our related developer guide: @@ -47,11 +48,11 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``set_score_request`` on a scorer does not mutate the instance and +Note that ``with_score_request`` on a scorer does not mutate the instance and instead returns a new instance with the metadata request attached to it. Both of these *consumers* know how to use metadata called ``"sample_weight"``:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -83,7 +84,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so that :func:`~model_selection.cross_validate` does not pass the weights along:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -113,7 +114,7 @@ Unweighted feature selection Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and therefore `"sample_weight"` is not routed to it:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -138,7 +139,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key consumers. In this example, we pass ``scoring_weight`` to the scorer, and ``fitting_weight`` to ``LogisticRegressionCV``:: - >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + >>> weighted_acc = make_scorer(accuracy_score).with_score_request( ... sample_weight="scoring_weight" ... ) >>> lr = LogisticRegressionCV( @@ -190,7 +191,9 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes ``my_weights`` is done at the router level, and not by the object, e.g. estimator, itself. -For the scorers, this is done the same way, using ``set_score_request`` method. +For the scorers, this is done using ``with_score_request`` method which returns +a new instance of the scorer with the requested metadata and does not change +the original scorer. If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for all objects which potentially can accept ``sample_weight`` should diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 290ec24cf2141..34f8562b6b301 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -247,8 +247,8 @@ def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" - def set_score_request(self, **kwargs): - """Set requested parameters by the scorer. + def with_score_request(self, **kwargs): + """Create a scorer that requests metadata parameters. Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 749cedc25bff5..743701cc5fb85 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -630,7 +630,7 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: - scorer = scorer.set_score_request(sample_weight=True) + scorer = scorer.with_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) @@ -1148,11 +1148,11 @@ def test_scorer_no_op_multiclass_select_proba(): @pytest.mark.parametrize("name, scorer", SCORERS.items()) def test_scorer_metadata_request(name, scorer): - assert hasattr(scorer, "set_score_request") + assert hasattr(scorer, "with_score_request") assert hasattr(scorer, "get_metadata_routing") - weighted_scorer = scorer.set_score_request(sample_weight=True) - # set_score_request shouldn't mutate the instance + weighted_scorer = scorer.with_score_request(sample_weight=True) + # with_score_request shouldn't mutate the instance assert weighted_scorer is not scorer assert str(scorer.get_metadata_routing()) == "{}" From e838a714d4b262932d804058e640464803c9b702 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 18:38:05 +0100 Subject: [PATCH 08/16] warn on overlapping kwargs and metadata --- sklearn/metrics/_scorer.py | 53 +++++++++++++++++++-- sklearn/metrics/tests/test_score_objects.py | 19 ++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 34f8562b6b301..58be34268750b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -19,6 +19,7 @@ # License: Simplified BSD import copy +import warnings from collections.abc import Iterable from functools import partial from collections import Counter @@ -247,6 +248,19 @@ def _factory_args(self): """Return non-default make_scorer arguments for repr.""" return "" + def _warn_overlap(self, message, kwargs): + """Warn if there is any overlap between ``self._kwargs`` and kwargs. + + This method is intended to be used to check for overlap between + ``self._kwargs`` and ``kwargs`` passed as metadata. + """ + _kwargs = set() if self._kwargs is None else set(self._kwargs.keys()) + overlap = _kwargs.intersection(kwargs.keys()) + if overlap: + warnings.warn( + f"{message} Overlapping parameters are: {overlap}", UserWarning + ) + def with_score_request(self, **kwargs): """Create a scorer that requests metadata parameters. @@ -258,9 +272,19 @@ def with_score_request(self, **kwargs): Parameters ---------- kwargs : dict - Arguments should be of the form param_name={True, False, None, str}. - The value can also be of the form RequestType + Arguments should be of the form ``param_name=alias``, and `alias` + can be either one of ``{True, False, None, str}`` or an instance of + RequestType. """ + self._warn_overlap( + message=( + "You are setting metadata request for parameters which are " + "already set as kwargs for this metric. These set values will be " + "overridden by passed metadata if provided. Please pass them either " + "as metadata or kwargs to `make_scorer`." + ), + kwargs=kwargs, + ) res = copy.deepcopy(self) res._metadata_request = MetadataRequest(owner=self.__class__.__name__) for param, alias in kwargs.items(): @@ -298,7 +322,14 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): score : float Score function applied to prediction of estimator on X. """ - + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_pred = method_caller(estimator, "predict", X) scoring_kwargs = copy.deepcopy(self._kwargs) scoring_kwargs.update(kwargs) @@ -336,6 +367,14 @@ def _score(self, method_caller, clf, X, y, **kwargs): score : float Score function applied to prediction of estimator on X. """ + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_type = type_of_target(y) y_pred = method_caller(clf, "predict_proba", X) @@ -392,6 +431,14 @@ def _score(self, method_caller, clf, X, y, **kwargs): score : float Score function applied to prediction of estimator on X. """ + self._warn_overlap( + message=( + "There is an overlap between set kwargs of this scorer instance and" + " passed metadata. Please pass them either as kwargs to `make_scorer`" + " or metadata, but not both." + ), + kwargs=kwargs, + ) y_type = type_of_target(y) if y_type not in ("binary", "multilabel-indicator"): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 743701cc5fb85..2962fff8537db 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1175,3 +1175,22 @@ def test_scorer_metadata_request(name, scorer): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") assert list(routed_params.scorer.score.keys()) == ["sample_weight"] + + +def test_metadata_kwarg_conflict(): + X, y = make_classification( + n_classes=3, n_informative=3, n_samples=20, random_state=0 + ) + lr = LogisticRegression().fit(X, y) + + scorer = make_scorer( + roc_auc_score, + needs_proba=True, + multi_class="ovo", + labels=lr.classes_, + ) + with pytest.warns(UserWarning, match="already set as kwargs"): + scorer = scorer.with_score_request(labels=True) + + with pytest.warns(UserWarning, match="There is an overlap"): + scorer(lr, X, y, labels=lr.classes_) From 0c6e2c454df2c2a58c904c8e9dee42f8e38ea3e8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 18:45:48 +0100 Subject: [PATCH 09/16] add references to docs --- sklearn/metrics/_scorer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 58be34268750b..4b1f3a093b68a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -234,6 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -267,6 +268,9 @@ def with_score_request(self, **kwargs): Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. + Please see :ref:`User Guide ` on how the routing + mechanism works. + .. versionadded:: 1.1 Parameters @@ -314,6 +318,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -359,6 +364,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 @@ -423,6 +429,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. + Refer to :func:`with_score_request` for more details. .. versionadded:: 1.1 From 0c61c05d3c66961d49b94129ccefc60afa955fb7 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 15 Mar 2022 19:08:42 +0100 Subject: [PATCH 10/16] add a note on custom scorers --- doc/modules/model_evaluation.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 5b3604d403f3b..4a194adf1fa7d 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -224,6 +224,13 @@ the following two rules: Again, by convention higher numbers are better, so if your scorer returns loss, that value should be negated. +- If it requires extra metadata to be passed to it, it should expose a + ``get_metadata_routing`` method returning the requested metadata. The user + should be able to set the requested metadata via a ``with_score_request`` + method returning a copy of the scorer with the appropriate requests set. + Please see :ref:`User Guide ` for more details. + + .. note:: **Using custom scorers in functions where n_jobs > 1** While defining the custom scoring function alongside the calling function @@ -563,8 +570,8 @@ or *informedness*. Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, and Case Studies `_, 2015. - .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description - and evaluation of a scalable learning classifier + .. [Urbanowicz2015] Urbanowicz R.J., Moore, J.H. :doi:`ExSTraCS 2.0: description + and evaluation of a scalable learning classifier system <10.1007/s12065-015-0128-8>`, Evol. Intel. (2015) 8: 89. .. _cohen_kappa: From ba36307d611d790876a7320dfe95f6441ed31ce1 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 22 Mar 2022 16:15:11 +0100 Subject: [PATCH 11/16] Revert "set_score_request -> with_score_request on scorers" This reverts commit a663b869f7455ead8bc144dbeb3e5d9d10665573. --- doc/metadata_routing.rst | 21 +++++++++------------ sklearn/metrics/_scorer.py | 6 ++---- sklearn/metrics/tests/test_score_objects.py | 8 ++++---- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0ffb21c2fc9bd..0e20f73966bf7 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -12,10 +12,9 @@ meta-estimators such as ``Pipeline`` and ``GridSearchCV``. In order to pass metadata to a method such as ``fit`` or ``score``, the object accepting the metadata, must *request* it. For estimators and splitters this is done via ``set_*_request`` methods, e.g. ``set_fit_request(...)``, and for scorers this -is done via ``with_score_request`` method which creates a new scorer with the -requested metadata. For grouped splitters such as ``GroupKFold`` a ``groups`` -parameter is requested by default. This is best demonstrated by the following -examples. +is done via ``set_score_request`` method. For grouped splitters such as +``GroupKFold`` a ``groups`` parameter is requested by default. This is best +demonstrated by the following examples. If you are developing a scikit-learn compatible estimator or meta-estimator, you can check our related developer guide: @@ -48,11 +47,11 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``with_score_request`` on a scorer does not mutate the instance and +Note that ``set_score_request`` on a scorer does not mutate the instance and instead returns a new instance with the metadata request attached to it. Both of these *consumers* know how to use metadata called ``"sample_weight"``:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -84,7 +83,7 @@ configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so that :func:`~model_selection.cross_validate` does not pass the weights along:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -114,7 +113,7 @@ Unweighted feature selection Unlike ``LogisticRegressionCV``, ``SelectKBest`` doesn't accept weights and therefore `"sample_weight"` is not routed to it:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True ... ) >>> lr = LogisticRegressionCV( @@ -139,7 +138,7 @@ Despite ``make_scorer`` and ``LogisticRegressionCV`` both expecting the key consumers. In this example, we pass ``scoring_weight`` to the scorer, and ``fitting_weight`` to ``LogisticRegressionCV``:: - >>> weighted_acc = make_scorer(accuracy_score).with_score_request( + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight="scoring_weight" ... ) >>> lr = LogisticRegressionCV( @@ -191,9 +190,7 @@ supports ``sample_weight`` in ``fit`` and ``score``, it exposes ``my_weights`` is done at the router level, and not by the object, e.g. estimator, itself. -For the scorers, this is done using ``with_score_request`` method which returns -a new instance of the scorer with the requested metadata and does not change -the original scorer. +For the scorers, this is done the same way, using ``set_score_request`` method. If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for all objects which potentially can accept ``sample_weight`` should diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 58f5f72c145dc..e6be7d34673ca 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -25,8 +25,6 @@ from collections import Counter import numpy as np -import copy -import warnings from . import ( r2_score, @@ -264,8 +262,8 @@ def _warn_overlap(self, message, kwargs): f"{message} Overlapping parameters are: {overlap}", UserWarning ) - def with_score_request(self, **kwargs): - """Create a scorer that requests metadata parameters. + def set_score_request(self, **kwargs): + """Set requested parameters by the scorer. Note that this method returns a new instance of the scorer, and does **not** change the original scorer object. diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index bec4e24c4d751..b9c18f4c7782d 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -631,7 +631,7 @@ def test_classification_scorer_sample_weight(): else: target = y_test try: - scorer = scorer.with_score_request(sample_weight=True) + scorer = scorer.set_score_request(sample_weight=True) weighted = scorer( estimator[name], X_test, target, sample_weight=sample_weight ) @@ -1161,11 +1161,11 @@ def test_scorer_no_op_multiclass_select_proba(): @pytest.mark.parametrize("name, scorer", SCORERS.items()) def test_scorer_metadata_request(name, scorer): - assert hasattr(scorer, "with_score_request") + assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") - weighted_scorer = scorer.with_score_request(sample_weight=True) - # with_score_request shouldn't mutate the instance + weighted_scorer = scorer.set_score_request(sample_weight=True) + # set_score_request shouldn't mutate the instance assert weighted_scorer is not scorer assert str(scorer.get_metadata_routing()) == "{}" From 2bea2036dfa8d2688046731f8474fb200f1ff629 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 10:14:32 +0100 Subject: [PATCH 12/16] set_score_request now mutates the instance --- doc/metadata_routing.rst | 4 +--- doc/modules/model_evaluation.rst | 5 ++--- sklearn/metrics/_scorer.py | 18 +++++++----------- sklearn/metrics/tests/test_score_objects.py | 18 +++++++++++------- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 0e20f73966bf7..de04ecf022415 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -47,9 +47,7 @@ Weighted scoring and fitting Here ``GroupKFold`` requests ``groups`` by default. However, we need to explicitly request weights for our scorer and for ``LogisticRegressionCV``. -Note that ``set_score_request`` on a scorer does not mutate the instance and -instead returns a new instance with the metadata request attached to it. Both -of these *consumers* know how to use metadata called ``"sample_weight"``:: +Both of these *consumers* know how to use metadata called ``"sample_weight"``:: >>> weighted_acc = make_scorer(accuracy_score).set_score_request( ... sample_weight=True diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index ea85973d5ee56..d6d21b3814918 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -227,9 +227,8 @@ the following two rules: - If it requires extra metadata to be passed to it, it should expose a ``get_metadata_routing`` method returning the requested metadata. The user - should be able to set the requested metadata via a ``with_score_request`` - method returning a copy of the scorer with the appropriate requests set. - Please see :ref:`User Guide ` for more details. + should be able to set the requested metadata via a ``set_score_request`` + method. Please see :ref:`User Guide ` for more details. .. note:: **Using custom scorers in functions where n_jobs > 1** diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index e6be7d34673ca..72a7e2a807c8f 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -234,7 +234,7 @@ def __call__(self, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -265,9 +265,6 @@ def _warn_overlap(self, message, kwargs): def set_score_request(self, **kwargs): """Set requested parameters by the scorer. - Note that this method returns a new instance of the scorer, and does - **not** change the original scorer object. - Please see :ref:`User Guide ` on how the routing mechanism works. @@ -289,11 +286,10 @@ def set_score_request(self, **kwargs): ), kwargs=kwargs, ) - res = copy.deepcopy(self) - res._metadata_request = MetadataRequest(owner=self.__class__.__name__) + self._metadata_request = MetadataRequest(owner=self.__class__.__name__) for param, alias in kwargs.items(): - res._metadata_request.score.add_request(param=param, alias=alias) - return res + self._metadata_request.score.add_request(param=param, alias=alias) + return self class _PredictScorer(_BaseScorer): @@ -318,7 +314,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -364,7 +360,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 @@ -429,7 +425,7 @@ def _score(self, method_caller, clf, X, y, **kwargs): **kwargs : dict Other parameters passed to the scorer, e.g. sample_weight. - Refer to :func:`with_score_request` for more details. + Refer to :func:`set_score_request` for more details. .. versionadded:: 1.1 diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index b9c18f4c7782d..85bf4f050f588 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1159,23 +1159,27 @@ def test_scorer_no_op_multiclass_select_proba(): scorer(lr, X_test, y_test) -@pytest.mark.parametrize("name, scorer", SCORERS.items()) -def test_scorer_metadata_request(name, scorer): +@pytest.mark.parametrize("name", get_scorer_names(), ids=get_scorer_names()) +def test_scorer_metadata_request(name): + scorer = get_scorer(name) assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") + assert str(scorer.get_metadata_routing()) == "{}" + weighted_scorer = scorer.set_score_request(sample_weight=True) - # set_score_request shouldn't mutate the instance - assert weighted_scorer is not scorer + # set_score_request should mutate the instance + assert weighted_scorer is scorer - assert str(scorer.get_metadata_routing()) == "{}" assert ( str(weighted_scorer.get_metadata_routing()) == "{'score': {'sample_weight': }}" ) # make sure putting the scorer in a router doesn't request anything - router = MetadataRouter(owner="test").add(method_mapping="score", scorer=scorer) + router = MetadataRouter(owner="test").add( + method_mapping="score", scorer=get_scorer(name) + ) with pytest.raises(TypeError, match="got unexpected argument"): router.validate_metadata(params={"sample_weight": 1}, method="score") routed_params = router.route_params(params={"sample_weight": 1}, caller="score") @@ -1203,7 +1207,7 @@ def test_metadata_kwarg_conflict(): labels=lr.classes_, ) with pytest.warns(UserWarning, match="already set as kwargs"): - scorer = scorer.with_score_request(labels=True) + scorer.set_score_request(labels=True) with pytest.warns(UserWarning, match="There is an overlap"): scorer(lr, X, y, labels=lr.classes_) From dba266f3222c17de3307a158d2e145d10ff2e5dd Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 10:17:40 +0100 Subject: [PATCH 13/16] don't test repr --- sklearn/metrics/tests/test_score_objects.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 85bf4f050f588..90d2820d339e9 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -15,7 +15,8 @@ from sklearn.utils._testing import assert_almost_equal from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import ignore_warnings -from sklearn.utils.metadata_routing import MetadataRouter +from sklearn.utils.metadata_routing import MetadataRouter, RequestType +from sklearn.tests.test_metadata_routing import assert_request_is_empty from sklearn.base import BaseEstimator from sklearn.metrics import ( @@ -1165,15 +1166,16 @@ def test_scorer_metadata_request(name): assert hasattr(scorer, "set_score_request") assert hasattr(scorer, "get_metadata_routing") - assert str(scorer.get_metadata_routing()) == "{}" + assert_request_is_empty(scorer.get_metadata_routing()) weighted_scorer = scorer.set_score_request(sample_weight=True) # set_score_request should mutate the instance assert weighted_scorer is scorer + assert_request_is_empty(weighted_scorer.get_metadata_routing(), exclude="score") assert ( - str(weighted_scorer.get_metadata_routing()) - == "{'score': {'sample_weight': }}" + weighted_scorer.get_metadata_routing().score.requests["sample_weight"] + == RequestType.REQUESTED ) # make sure putting the scorer in a router doesn't request anything From 203b4e4efa410be4597db2772f3be37a50d6d128 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 11:23:26 +0100 Subject: [PATCH 14/16] fix and test _passthrough_scorer --- sklearn/metrics/_scorer.py | 10 +++++++--- sklearn/metrics/tests/test_score_objects.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 72a7e2a807c8f..09a70cf7a393c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -67,6 +67,7 @@ from ..utils.metadata_routing import MetadataRequest from ..utils.metadata_routing import MetadataRouter from ..utils.metadata_routing import process_routing +from ..utils.metadata_routing import get_routing_for_object def _cached_call(cache, estimator, method, *args, **kwargs): @@ -541,9 +542,12 @@ def get_metadata_routing(self): A :class:`~utils.metadata_routing.MetadataRouter` encapsulating routing information. """ - return MetadataRouter(owner=self.__class__.__name__).add( - estimator=self._estimator, method_mapping="score" - ) + # This scorer doesn't do any validation or routing, it only exposes the + # score requests to the parent object. This object behaves as a + # consumer rather than a router. + res = MetadataRequest(owner=self._estimator.__class__.__name__) + res.score = get_routing_for_object(self._estimator).score + return res def check_scoring(estimator, scoring=None, *, allow_none=False): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 90d2820d339e9..5aafb5c6b610d 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1213,3 +1213,16 @@ def test_metadata_kwarg_conflict(): with pytest.warns(UserWarning, match="There is an overlap"): scorer(lr, X, y, labels=lr.classes_) + + +def test_passthrough_scorer_metadata_request(): + scorer = _passthrough_scorer( + estimator=LinearSVC() + .set_score_request(sample_weight="alias") + .set_fit_request(sample_weight=True) + ) + # test that _passthrough_scorer leaves everything other than `score` empty + assert_request_is_empty(scorer.get_metadata_routing(), exclude="score") + # test that _passthrough_scorer doesn't behave like a router and leaves + # the request as is. + assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias" From 4e32f318900ab43afb2fe28559e53be786ba21e9 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 23 Mar 2022 14:20:32 +0100 Subject: [PATCH 15/16] Joel's comments --- doc/modules/model_evaluation.rst | 4 ++-- sklearn/metrics/_scorer.py | 4 ++-- sklearn/metrics/tests/test_score_objects.py | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index d6d21b3814918..1e97e3b6cab6f 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -225,8 +225,8 @@ the following two rules: Again, by convention higher numbers are better, so if your scorer returns loss, that value should be negated. -- If it requires extra metadata to be passed to it, it should expose a - ``get_metadata_routing`` method returning the requested metadata. The user +- Advanced: If it requires extra metadata to be passed to it, it should expose + a ``get_metadata_routing`` method returning the requested metadata. The user should be able to set the requested metadata via a ``set_score_request`` method. Please see :ref:`User Guide ` for more details. diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 09a70cf7a393c..ffc12add29095 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -523,7 +523,7 @@ def get_scorer(scoring): return scorer -class _passthrough_scorer: +class _PassthroughScorer: def __init__(self, estimator): self._estimator = estimator @@ -602,7 +602,7 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): return get_scorer(scoring) elif scoring is None: if hasattr(estimator, "score"): - return _passthrough_scorer(estimator) + return _PassthroughScorer(estimator) elif allow_none: return None else: diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 5aafb5c6b610d..187ffdb7368e3 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -39,7 +39,7 @@ from sklearn.metrics import check_scoring from sklearn.metrics._scorer import ( _PredictScorer, - _passthrough_scorer, + _PassthroughScorer, _MultimetricScorer, _check_multimetric_scoring, ) @@ -238,7 +238,7 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator): estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = scoring_validator(estimator) - assert isinstance(scorer, _passthrough_scorer) + assert isinstance(scorer, _PassthroughScorer) assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFitAndPredict() @@ -1215,14 +1215,14 @@ def test_metadata_kwarg_conflict(): scorer(lr, X, y, labels=lr.classes_) -def test_passthrough_scorer_metadata_request(): - scorer = _passthrough_scorer( +def test_PassthroughScorer_metadata_request(): + scorer = _PassthroughScorer( estimator=LinearSVC() .set_score_request(sample_weight="alias") .set_fit_request(sample_weight=True) ) - # test that _passthrough_scorer leaves everything other than `score` empty + # test that _PassthroughScorer leaves everything other than `score` empty assert_request_is_empty(scorer.get_metadata_routing(), exclude="score") - # test that _passthrough_scorer doesn't behave like a router and leaves + # test that _PassthroughScorer doesn't behave like a router and leaves # the request as is. assert scorer.get_metadata_routing().score.requests["sample_weight"] == "alias" From 44f91cecd1c509a8b77bb30766dfd1332d55f2a8 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Thu, 24 Mar 2022 10:30:30 +0100 Subject: [PATCH 16/16] Thomas's comments --- sklearn/metrics/_scorer.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index ffc12add29095..bf345ff6a0344 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -333,8 +333,7 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): kwargs=kwargs, ) y_pred = method_caller(estimator, "predict", X) - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) @@ -387,11 +386,10 @@ def _score(self, method_caller, clf, X, y, **kwargs): # Thus, we need to check for the shape of `y_pred`. y_pred = self._select_proba_binary(y_pred, clf.classes_) - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} # this is for backward compatibility to avoid passing sample_weight # to the scorer if it's None - # probably remove in v1.3 + # TODO(1.3) Probably remove if scoring_kwargs.get("sample_weight", -1) is None: del scoring_kwargs["sample_weight"] @@ -473,11 +471,10 @@ def _score(self, method_caller, clf, X, y, **kwargs): elif isinstance(y_pred, list): y_pred = np.vstack([p[:, -1] for p in y_pred]).T - scoring_kwargs = copy.deepcopy(self._kwargs) - scoring_kwargs.update(kwargs) + scoring_kwargs = {**self._kwargs, **kwargs} # this is for backward compatibility to avoid passing sample_weight # to the scorer if it's None - # probably remove in v1.3 + # TODO(1.3) Probably remove if scoring_kwargs.get("sample_weight", -1) is None: del scoring_kwargs["sample_weight"] return self._sign * self._score_func(y, y_pred, **scoring_kwargs)