scikit-learn · glemaitre · Jul 21, 2020 · Jul 21, 2020 · Jul 21, 2020 · Jul 21, 2020
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -202,6 +202,19 @@ Here is an example of building custom scorers, and of using the
     >>> score(clf, X, y)
     -0.69...
 
+You can still set any of the additional scoring function parameters once the
+scorer has been created using the method `set_kwargs`::
+
+    >>> def my_custom_loss_func(y_true, y_pred, *, pulling_func=np.max):
+    ...    diff = pulling_func(np.abs(y_true, y_pred))
+    ...    return np.log1p(diff)
+    ...
+    >>> score = make_scorer(my_custom_loss_func, greater_is_better=False)
+    >>> score(clf, X, y)
+    -0.69...
+    >>> _ = score.set_kwargs(pulling_func=np.mean)
+    >>> score(clf, X, y)
+    -0.40...
 
 .. _diy_scoring:
 

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -244,6 +244,19 @@ Changelog
   class to be used when computing the roc auc statistics.
   :pr:`17651` by :user:`Clara Matos <claramatos>`.
 
+- |Fix| Fixed a bug where :func:`metrics.get_scorer` was not doing copy of
+  internal defined scikit-learn metrics. It has an impact on the
+  :func:`metrics.check_scoring` function as well.
+  :pr:`17962` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- |Enhancement| Add the possibility to set additional parameter of a scorer
+  created with :func:`metrics.make_scorer` using the method `set_kwargs`.
+  :pr:`17962` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- |Enhancement| Add the possibility to pass additional parameter required by
+  the scoring function when calling :func:`get_scorer`.
+  :pr:`17962` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 

diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
@@ -819,8 +819,8 @@ def grad(x, *args): return _logistic_loss_and_grad(x, *args)[1]
 
 
 # helper function for LogisticCV
-def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
-                          scoring=None, fit_intercept=False,
+def _log_reg_scoring_path(X, y, train, test, scoring, pos_class=None, Cs=10,
+                          fit_intercept=False,
                           max_iter=100, tol=1e-4, class_weight=None,
                           verbose=0, solver='lbfgs', penalty='l2',
                           dual=False, intercept_scaling=1.,
@@ -843,6 +843,12 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
     test : list of indices
         The indices of the test set.
 
+    scoring : callable
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``. For a list of scoring functions
+        that can be used, look at :mod:`sklearn.metrics`.
+
     pos_class : int, default=None
         The class with respect to which we perform a one-vs-all fit.
         If None, then it is assumed that the given problem is binary.
@@ -853,13 +859,6 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
         values are chosen in a logarithmic scale between 1e-4 and 1e4.
         If not provided, then a fixed set of values for Cs are used.
 
-    scoring : callable, default=None
-        A string (see model evaluation documentation) or
-        a scorer callable object / function with signature
-        ``scorer(estimator, X, y)``. For a list of scoring functions
-        that can be used, look at :mod:`sklearn.metrics`. The
-        default scoring option used is accuracy_score.
-
     fit_intercept : bool, default=False
         If False, then the bias term is set to zero. Else the last
         term of each coef_ gives us the intercept.
@@ -989,7 +988,6 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
 
     scores = list()
 
-    scoring = get_scorer(scoring)
     for w in coefs:
         if multi_class == 'ovr':
             w = w[np.newaxis, :]
@@ -1000,10 +998,7 @@ def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,
             log_reg.coef_ = w
             log_reg.intercept_ = 0.
 
-        if scoring is None:
-            scores.append(log_reg.score(X_test, y_test))
-        else:
-            scores.append(scoring(log_reg, X_test, y_test))
+        scores.append(scoring(log_reg, X_test, y_test))
 
     return coefs, Cs, np.array(scores), n_iter
 
@@ -1557,7 +1552,7 @@ class LogisticRegressionCV(LogisticRegression,
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``. For a list of scoring functions
         that can be used, look at :mod:`sklearn.metrics`. The
-        default scoring option used is 'accuracy'.
+        default scoring option used is :func:`~sklearn.metrics.accuracy_score`.
 
     solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \
             default='lbfgs'
@@ -1875,6 +1870,9 @@ def fit(self, X, y, sample_weight=None):
                 class_weight, classes=np.arange(len(self.classes_)), y=y)
             class_weight = dict(enumerate(class_weight))
 
+        scoring = "accuracy" if self.scoring is None else self.scoring
+        self._scorer = get_scorer(scoring)
+
         path_func = delayed(_log_reg_scoring_path)
 
         # The SAG solver releases the GIL so it's more efficient to use
@@ -1890,7 +1888,7 @@ def fit(self, X, y, sample_weight=None):
                       fit_intercept=self.fit_intercept, penalty=self.penalty,
                       dual=self.dual, solver=solver, tol=self.tol,
                       max_iter=self.max_iter, verbose=self.verbose,
-                      class_weight=class_weight, scoring=self.scoring,
+                      class_weight=class_weight, scoring=self._scorer,
                       multi_class=multi_class,
                       intercept_scaling=self.intercept_scaling,
                       random_state=self.random_state,
@@ -2082,10 +2080,7 @@ def score(self, X, y, sample_weight=None):
             Score of self.predict(X) wrt. y.
 
         """
-        scoring = self.scoring or 'accuracy'
-        scoring = get_scorer(scoring)
-
-        return scoring(self, X, y, sample_weight=sample_weight)
+        return self._scorer(self, X, y, sample_weight=sample_weight)
 
     def _more_tags(self):
         return {

diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
@@ -122,14 +122,14 @@ def __call__(self, model, X, y, sample_weight=None):
     assert lr.C_[0] == Cs[2]
 
     # scorer called 8 times (cv*len(Cs))
-    assert mock_scorer.calls == cv * len(Cs)
+    assert lr._scorer.calls == cv * len(Cs)
 
     # reset mock_scorer
-    mock_scorer.calls = 0
+    lr._scorer.calls = 0
     custom_score = lr.score(X, lr.predict(X))
 
     assert custom_score == mock_scorer.scores[0]
-    assert mock_scorer.calls == 1
+    assert lr._scorer.calls == 1
 
 
 def test_logistic_cv_score_does_not_warn_by_default():

diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
@@ -18,9 +18,11 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
+from collections import Counter
 from collections.abc import Iterable
+from copy import deepcopy
 from functools import partial
-from collections import Counter
+from inspect import signature
 
 import numpy as np
 
@@ -165,6 +167,38 @@ def _factory_args(self):
         """Return non-default make_scorer arguments for repr."""
         return ""
 
+    def set_kwargs(self, **kwargs):
+        """Set the parameters which will be given to the scoring function.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional scoring function parameters.
+
+        Returns
+        -------
+        self : object
+            Scorer instance.
+        """
+        if not kwargs:
+            return self
+
+        signature_score_func = signature(self._score_func)
+        params_score_func = set(
+            [name for name, param in signature_score_func.parameters.items()
+             if param.kind == param.KEYWORD_ONLY]
+        )
+
+        unknown_params = (set(kwargs.keys()) - params_score_func)
+        if unknown_params:
+            raise ValueError(
+                f"Unknown parameters provided: {unknown_params}. The scoring "
+                f"function takes only the parameters {params_score_func}."
+            )
+
+        self._kwargs.update(kwargs)
+        return self
+
 
 class _PredictScorer(_BaseScorer):
     def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
@@ -328,8 +362,8 @@ def _factory_args(self):
         return ", needs_threshold=True"
 
 
-def get_scorer(scoring):
-    """Get a scorer from string.
+def get_scorer(scoring, **kwargs):
+    """Get a scorer from string or a callable.
 
     Read more in the :ref:`User Guide <scoring_parameter>`.
 
@@ -338,18 +372,69 @@ def get_scorer(scoring):
     scoring : str or callable
         Scoring method as string. If callable it is returned as is.
 
+    **kwargs : dict
+        Additional parameters that will be passed to later on when calling the
+        scorer.
+
     Returns
     -------
     scorer : callable
         The scorer.
+
+    Examples
+    --------
+    :func:`get_scorer` allows to fetch a callable instance which can be used
+    for getting a score given `X` and `y`. When a string is provided, the
+    scorer pre-built in scikit-learn is returned:
+
+    >>> from sklearn.metrics import get_scorer
+    >>> scorer = get_scorer("f1")
+
+    However, this scorer can sometimes not be used on some specific target. For
+    instance, the F1 score cannot be used on multi-class problems without
+    providing the average parameter, otherwise the result will be ambiguous.
+
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.dummy import DummyClassifier
+    >>> X, y = load_iris(return_X_y=True)
+    >>> classifier = DummyClassifier().fit(X, y)
+    >>> try:
+    ...     scorer(classifier, X, y)
+    ... except ValueError as e:
+    ...     print(e)
+    Target is multiclass but average='binary'. Please choose another average
+    setting, one of [None, 'micro', 'macro', 'weighted'].
+
+    In this case, there are several solutions to overcome this problem.
+    First, you can used a string to get the qualified metric:
+
+    >>> scorer = get_scorer("f1_micro")
+    >>> scorer(classifier, X, y)
+    0.333...
+
+    Otherwise, one can set the additional parameters to pass to the scoring
+    function by setting the keyword arguments of the scorer.
+
+    >>> scorer = get_scorer("f1")
+    >>> _ = scorer.set_kwargs(average="micro")
+    >>> scorer(classifier, X, y)
+    0.333...
+
+    Finally, we could have directly pass the additional parameters when calling
+    :func:`get_scorer`.
+
+    >>> scorer = get_scorer("f1", average="micro")
+    >>> scorer(classifier, X, y)
+    0.333...
     """
     if isinstance(scoring, str):
         try:
-            scorer = SCORERS[scoring]
+            scorer = deepcopy(SCORERS[scoring]).set_kwargs(**kwargs)
         except KeyError:
-            raise ValueError('%r is not a valid scoring value. '
-                             'Use sorted(sklearn.metrics.SCORERS.keys()) '
-                             'to get valid options.' % scoring)
+            raise ValueError(
+                f"'{scoring}' is not a valid scoring value. Use "
+                f"sorted(sklearn.metrics.SCORERS.keys()) to get valid options."
+            )
     else:
         scorer = scoring
     return scorer