FIX make sure _PassthroughScorer works with meta-estimators (scikit-learn#31898)

adrinjalali · StefanieSenger · web-flow · commit 285883c039a2 · 2025-08-31T19:47:57.000+05:00
Co-authored-by: Stefanie Senger &lt;91849487+StefanieSenger@users.noreply.github.com&gt;
diff --git a/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst b/doc/whats_new/upcoming_changes/metadata-routing/31898.fix.rst
@@ -0,0 +1,3 @@
+- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a
+  :class:`GridSearchCV` would raise an error with metadata routing enabled.
+  By `Adrin Jalali`_.
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
@@ -30,7 +30,7 @@
     _rescale_data,
 )
 from sklearn.linear_model._sag import sag_solver
-from sklearn.metrics import check_scoring, get_scorer_names
+from sklearn.metrics import check_scoring, get_scorer, get_scorer_names
 from sklearn.model_selection import GridSearchCV
 from sklearn.preprocessing import LabelBinarizer
 from sklearn.utils import (
@@ -1359,6 +1359,12 @@ def __sklearn_tags__(self):
         tags.classifier_tags.multi_label = True
         return tags
 
+    def _get_scorer_instance(self):
+        """Return a scorer which corresponds to what's defined in ClassiferMixin
+        parent class. This is used for routing `sample_weight`.
+        """
+        return get_scorer("accuracy")
+
 
 class RidgeClassifier(_RidgeClassifierMixin, _BaseRidge):
     """Classifier using Ridge regression.
@@ -2499,7 +2505,7 @@ def get_metadata_routing(self):
             MetadataRouter(owner=self.__class__.__name__)
             .add_self_request(self)
             .add(
-                scorer=self.scoring,
+                scorer=self._get_scorer(),
                 method_mapping=MethodMapping().add(caller="fit", callee="score"),
             )
             .add(
@@ -2510,14 +2516,20 @@ def get_metadata_routing(self):
         return router
 
     def _get_scorer(self):
-        scorer = check_scoring(estimator=self, scoring=self.scoring, allow_none=True)
+        """Make sure the scorer is weighted if necessary.
+
+        This uses `self._get_scorer_instance()` implemented in child objects to get the
+        raw scorer instance of the estimator, which will be ignored if `self.scoring` is
+        not None.
+        """
         if _routing_enabled() and self.scoring is None:
             # This estimator passes an array of 1s as sample_weight even if
             # sample_weight is not provided by the user. Therefore we need to
             # always request it. But we don't set it if it's passed explicitly
             # by the user.
-            scorer.set_score_request(sample_weight=True)
-        return scorer
+            return self._get_scorer_instance().set_score_request(sample_weight=True)
+
+        return check_scoring(estimator=self, scoring=self.scoring, allow_none=True)
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
@@ -2707,6 +2719,12 @@ def fit(self, X, y, sample_weight=None, **params):
         super().fit(X, y, sample_weight=sample_weight, **params)
         return self
 
+    def _get_scorer_instance(self):
+        """Return a scorer which corresponds to what's defined in RegressorMixin
+        parent class. This is used for routing `sample_weight`.
+        """
+        return get_scorer("r2")
+
 
 class RidgeClassifierCV(_RidgeClassifierMixin, _BaseRidgeCV):
     """Ridge classifier with built-in cross-validation.
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
@@ -489,17 +489,6 @@ class _PassthroughScorer(_MetadataRequester):
     def __init__(self, estimator):
         self._estimator = estimator
 
-        requests = MetadataRequest(owner=self.__class__.__name__)
-        try:
-            requests.score = copy.deepcopy(estimator._metadata_request.score)
-        except AttributeError:
-            try:
-                requests.score = copy.deepcopy(estimator._get_default_requests().score)
-            except AttributeError:
-                pass
-
-        self._metadata_request = requests
-
     def __call__(self, estimator, *args, **kwargs):
         """Method that wraps estimator.score"""
         return estimator.score(*args, **kwargs)
@@ -525,32 +514,7 @@ def get_metadata_routing(self):
             A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
-        return get_routing_for_object(self._metadata_request)
-
-    def set_score_request(self, **kwargs):
-        """Set requested parameters by the scorer.
-
-        Please see :ref:`User Guide <metadata_routing>` on how the routing
-        mechanism works.
-
-        .. versionadded:: 1.5
-
-        Parameters
-        ----------
-        kwargs : dict
-            Arguments should be of the form ``param_name=alias``, and `alias`
-            can be one of ``{True, False, None, str}``.
-        """
-        if not _routing_enabled():
-            raise RuntimeError(
-                "This method is only available when metadata routing is enabled."
-                " You can enable it using"
-                " sklearn.set_config(enable_metadata_routing=True)."
-            )
-
-        for param, alias in kwargs.items():
-            self._metadata_request.score.add_request(param=param, alias=alias)
-        return self
+        return get_routing_for_object(self._estimator)
 
 
 def _check_multimetric_scoring(estimator, scoring):
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
@@ -52,7 +52,7 @@
 from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.pipeline import make_pipeline
+from sklearn.pipeline import Pipeline, make_pipeline
 from sklearn.svm import LinearSVC
 from sklearn.tests.metadata_routing_common import (
     assert_request_is_empty,
@@ -1301,37 +1301,27 @@ def test_metadata_kwarg_conflict():
 
 @config_context(enable_metadata_routing=True)
 def test_PassthroughScorer_set_score_request():
-    """Test that _PassthroughScorer.set_score_request adds the correct metadata request
-    on itself and doesn't change its estimator's routing."""
+    """Test that _PassthroughScorer.set_score_request raises when routing enabled."""
     est = LogisticRegression().set_score_request(sample_weight="estimator_weights")
     # make a `_PassthroughScorer` with `check_scoring`:
     scorer = check_scoring(est, None)
-    assert (
-        scorer.get_metadata_routing().score.requests["sample_weight"]
-        == "estimator_weights"
-    )
-
-    scorer.set_score_request(sample_weight="scorer_weights")
-    assert (
-        scorer.get_metadata_routing().score.requests["sample_weight"]
-        == "scorer_weights"
-    )
-
-    # making sure changing the passthrough object doesn't affect the estimator.
-    assert (
-        est.get_metadata_routing().score.requests["sample_weight"]
-        == "estimator_weights"
-    )
+    with pytest.raises(
+        AttributeError,
+        match="'_PassthroughScorer' object has no attribute 'set_score_request'",
+    ):
+        scorer.set_score_request(sample_weight=True)
 
 
 def test_PassthroughScorer_set_score_request_raises_without_routing_enabled():
     """Test that _PassthroughScorer.set_score_request raises if metadata routing is
     disabled."""
     scorer = check_scoring(LogisticRegression(), None)
-    msg = "This method is only available when metadata routing is enabled."
 
-    with pytest.raises(RuntimeError, match=msg):
-        scorer.set_score_request(sample_weight="my_weights")
+    with pytest.raises(
+        AttributeError,
+        match="'_PassthroughScorer' object has no attribute 'set_score_request'",
+    ):
+        scorer.set_score_request(sample_weight=True)
 
 
 @config_context(enable_metadata_routing=True)
@@ -1673,3 +1663,26 @@ def test_make_scorer_reponse_method_default_warning():
     with warnings.catch_warnings():
         warnings.simplefilter("error", FutureWarning)
         make_scorer(accuracy_score)
+
+
+@config_context(enable_metadata_routing=True)
+def test_Pipeline_in_PassthroughScorer():
+    """Non-regression test for
+    https://github.com/scikit-learn/scikit-learn/issues/30937
+
+    Make sure pipeline inside a gridsearchcv works with sample_weight passed!
+    """
+    X, y = make_classification(10, 4)
+    sample_weight = np.ones_like(y)
+    pipe = Pipeline(
+        [
+            (
+                "logistic",
+                LogisticRegression()
+                .set_fit_request(sample_weight=True)
+                .set_score_request(sample_weight=True),
+            )
+        ]
+    )
+    search = GridSearchCV(pipe, {"logistic__C": [0.1, 1]}, n_jobs=1, cv=3)
+    search.fit(X, y, sample_weight=sample_weight)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- Fixed an issue where passing `sample_weight` to a :class:`Pipeline` inside a
	`2`	+ :class:`GridSearchCV` would raise an error with metadata routing enabled.
	`3`	+ By `Adrin Jalali`_.