scikit-learn · NicolasHug · Apr 1, 2019 · Apr 2, 2019 · Apr 2, 2019 · Apr 3, 2019
diff --git a/sklearn/base.py b/sklearn/base.py
@@ -25,7 +25,8 @@
     'stateless': False,
     'multilabel': False,
     '_skip_test': False,
-    'multioutput_only': False}
+    'multioutput_only': False,
+    'supports_sample_weight': False}
 
 
 def clone(estimator, safe=True):

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
@@ -199,6 +199,17 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use LinearSVC
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
+
     def predict_proba(self, X):
         """Posterior probabilities of classification
 

diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py
@@ -382,3 +382,6 @@ def fit_predict(self, X, y=None, sample_weight=None):
         """
         self.fit(X, sample_weight=sample_weight)
         return self.labels_
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py
@@ -1103,6 +1103,8 @@ def score(self, X, y=None, sample_weight=None):
         return -_labels_inertia(X, sample_weight, x_squared_norms,
                                 self.cluster_centers_)[1]
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
 
 def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
                      old_center_buffer, compute_squared_diff,

diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
@@ -235,4 +235,14 @@ def predict(self, X):
         return pred_trans
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        if self.regressor is None:
+            # base_estimator can be None in which case we use LinearRegression
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': supports_sample_weight}
+
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
@@ -318,7 +318,8 @@ def predict_log_proba(self, X):
             return [np.log(p) for p in proba]
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': True}
 
     def score(self, X, y, sample_weight=None):
         """Returns the mean accuracy on the given test data and labels.
@@ -510,7 +511,8 @@ def predict(self, X, return_std=False):
         return (y, y_std) if return_std else y
 
     def _more_tags(self):
-        return {'poor_score': True, 'no_validation': True}
+        return {'poor_score': True, 'no_validation': True,
+                'supports_sample_weight': True}
 
     def score(self, X, y, sample_weight=None):
         """Returns the coefficient of determination R^2 of the prediction.

diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py
@@ -20,7 +20,7 @@
 from ..utils.metaestimators import if_delegate_has_method
 from ..utils.multiclass import check_classification_targets
 from ..utils.random import sample_without_replacement
-from ..utils.validation import has_fit_parameter, check_is_fitted
+from ..utils.validation import check_is_fitted
 
 
 __all__ = ["BaggingClassifier",
@@ -66,8 +66,7 @@ def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
-    support_sample_weight = has_fit_parameter(ensemble.base_estimator_,
-                                              "sample_weight")
+    support_sample_weight = ensemble._get_tags()['supports_sample_weight']
     if not support_sample_weight and sample_weight is not None:
         raise ValueError("The base estimator doesn't support sample weight")
 
@@ -427,6 +426,17 @@ def estimators_samples_(self):
         return [sample_indices
                 for _, sample_indices in self._get_estimators_indices()]
 
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use a decision tree,
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
+
 
 class BaggingClassifier(BaseBagging, ClassifierMixin):
     """A Bagging classifier.

diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
@@ -147,6 +147,9 @@ def __iter__(self):
         """Returns iterator over estimators in the ensemble."""
         return iter(self.estimators_)
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 def _partition_estimators(n_estimators, n_jobs):
     """Private function used to partition estimators between jobs."""

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
@@ -1478,21 +1478,15 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                 raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),
                                            dtype=np.float64)
             else:
-                # XXX clean this once we have a support_sample_weight tag
                 if sample_weight_is_none:
                     self.init_.fit(X, y)
                 else:
-                    msg = ("The initial estimator {} does not support sample "
-                           "weights.".format(self.init_.__class__.__name__))
-                    try:
-                        self.init_.fit(X, y, sample_weight=sample_weight)
-                    except TypeError:  # regular estimator without SW support
+                    if not self.init_._get_tags()['supports_sample_weight']:
+                        msg = ("The initial estimator {} does not "
+                               "support sample weights."
+                               .format(self.init_.__class__.__name__))
                         raise ValueError(msg)
-                    except ValueError as e:
-                        if 'not enough values to unpack' in str(e):  # pipeline
-                            raise ValueError(msg) from e
-                        else:  # regular estimator whose input checking failed
-                            raise
+                    self.init_.fit(X, y, sample_weight=sample_weight)
 
                 raw_predictions = \
                     self.loss_.get_init_raw_predictions(X, self.init_)

diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1395,18 +1395,6 @@ def test_gradient_boosting_with_init_pipeline():
                   'weights'):
         gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
 
-    # Passing sample_weight to a pipeline raises a ValueError. This test makes
-    # sure we make the distinction between ValueError raised by a pipeline that
-    # was passed sample_weight, and a ValueError raised by a regular estimator
-    # whose input checking failed.
-    with pytest.raises(
-            ValueError,
-            match='nu <= 0 or nu > 1'):
-        # Note that NuSVR properly supports sample_weight
-        init = NuSVR(gamma='auto', nu=1.5)
-        gb = GradientBoostingRegressor(init=init)
-        gb.fit(X, y, sample_weight=np.ones(X.shape[0]))
-
 
 @pytest.mark.parametrize('estimator, missing_method', [
     (GradientBoostingClassifier(init=LinearSVC()), 'predict_proba'),

diff --git a/sklearn/ensemble/voting_classifier.py b/sklearn/ensemble/voting_classifier.py
@@ -18,7 +18,7 @@
 from ..base import clone
 from ..preprocessing import LabelEncoder
 from ..utils._joblib import Parallel, delayed
-from ..utils.validation import has_fit_parameter, check_is_fitted
+from ..utils.validation import check_is_fitted
 from ..utils.metaestimators import _BaseComposition
 from ..utils import Bunch
 
@@ -176,10 +176,11 @@ def fit(self, X, y, sample_weight=None):
                              % (len(self.weights), len(self.estimators)))
 
         if sample_weight is not None:
-            for name, step in self.estimators:
-                if not has_fit_parameter(step, 'sample_weight'):
+            for est in self.estimators:
+                if not est._get_tags()['supports_sample_weight']:
                     raise ValueError('Underlying estimator \'%s\' does not'
-                                     ' support sample weights.' % name)
+                                     ' support sample weights.' %
+                                     est.__class__.__name__)
         names, clfs = zip(*self.estimators)
         self._validate_names(names)
 
@@ -343,3 +344,11 @@ def get_params(self, deep=True):
     def _predict(self, X):
         """Collect results from clf.predict calls. """
         return np.asarray([clf.predict(X) for clf in self.estimators_]).T
+
+    def _more_tags(self):
+        supports_sample_weight = all(
+            est._get_tags()['supports_sample_weight']
+            for est in self.estimators
+        )
+
+        return {'supports_sample_weight': supports_sample_weight}
diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py
@@ -37,7 +37,6 @@
 from ..utils.extmath import stable_cumsum
 from ..metrics import accuracy_score, r2_score
 from ..utils.validation import check_is_fitted
-from ..utils.validation import has_fit_parameter
 from ..utils.validation import _num_samples
 
 __all__ = [
@@ -440,7 +439,7 @@ def _validate_estimator(self):
                     "probabilities with a predict_proba method.\n"
                     "Please change the base estimator or set "
                     "algorithm='SAMME' instead.")
-        if not has_fit_parameter(self.base_estimator_, "sample_weight"):
+        if not self.base_estimator_._get_tags()['supports_sample_weight']:
             raise ValueError("%s doesn't support sample_weight."
                              % self.base_estimator_.__class__.__name__)
 

diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
@@ -407,4 +407,5 @@ def __setstate__(self, state):
             self._build_f(self._necessary_X_, self._necessary_y_)
 
     def _more_tags(self):
-        return {'X_types': ['1darray']}
+        return {'X_types': ['1darray'],
+                'supports_sample_weight': True}
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
@@ -192,3 +192,6 @@ def predict(self, X):
         check_is_fitted(self, ["X_fit_", "dual_coef_"])
         K = self._get_kernel(X, self.X_fit_)
         return np.dot(K, self.dual_coef_)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
@@ -508,6 +508,9 @@ def rmatvec(b):
         self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
              check_input=True):

diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py
@@ -359,6 +359,9 @@ def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,
 
         return score
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 ###############################################################################
 # ARD (Automatic Relevance Determination) regression

diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py
@@ -305,3 +305,6 @@ def fit(self, X, y, sample_weight=None):
             y - safe_sparse_dot(X, self.coef_) - self.intercept_)
         self.outliers_ = residual > self.scale_ * self.epsilon
         return self
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
@@ -1680,6 +1680,9 @@ def predict_log_proba(self, X):
         """
         return np.log(self.predict_proba(X))
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class LogisticRegressionCV(LogisticRegression, BaseEstimator,
                            LinearClassifierMixin):
@@ -2260,3 +2263,6 @@ def score(self, X, y, sample_weight=None):
             scoring = get_scorer(scoring)
 
         return scoring(self, X, y, sample_weight=sample_weight)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py
@@ -153,3 +153,6 @@ def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, power_t=0.5,
             warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py
@@ -13,7 +13,6 @@
 from ..utils.random import sample_without_replacement
 from ..utils.validation import check_is_fitted
 from .base import LinearRegression
-from ..utils.validation import has_fit_parameter
 from ..exceptions import ConvergenceWarning
 
 _EPSILON = np.spacing(1)
@@ -316,11 +315,9 @@ def fit(self, X, y, sample_weight=None):
         except ValueError:
             pass
 
-        estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
-                                                            "sample_weight")
+        supports_sample_weight = self._get_tags()['supports_sample_weight']
         estimator_name = type(base_estimator).__name__
-        if (sample_weight is not None and not
-                estimator_fit_has_sample_weight):
+        if sample_weight is not None and not supports_sample_weight:
             raise ValueError("%s does not support sample_weight. Samples"
                              " weights are only used for the calibration"
                              " itself." % estimator_name)
@@ -492,3 +489,14 @@ def score(self, X, y):
         check_is_fitted(self, 'estimator_')
 
         return self.estimator_.score(X, y)
+
+    def _more_tags(self):
+        if self.base_estimator is None:
+            # base_estimator can be None in which case we use LinearRegression
+            # which accepts sample_weight
+            supports_sample_weight = True
+        else:
+            supports_sample_weight = (
+                self.base_estimator._get_tags()['supports_sample_weight'])
+
+        return {'supports_sample_weight': supports_sample_weight}
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
@@ -572,6 +572,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class Ridge(_BaseRidge, RegressorMixin):
     """Linear least squares with l2 regularization.
@@ -1223,6 +1226,9 @@ def fit(self, X, y, sample_weight=None):
 
         return self
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class RidgeCV(_BaseRidgeCV, RegressorMixin):
     """Ridge regression with built-in cross-validation.

diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py
@@ -1048,6 +1048,9 @@ def predict_log_proba(self):
     def _predict_log_proba(self, X):
         return np.log(self.predict_proba(X))
 
+    def _more_tags(self):
+        return {'supports_sample_weight': True}
+
 
 class BaseSGDRegressor(BaseSGD, RegressorMixin):
 
@@ -1526,3 +1529,6 @@ def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
             validation_fraction=validation_fraction,
             n_iter_no_change=n_iter_no_change, warm_start=warm_start,
             average=average)
+
+    def _more_tags(self):
+        return {'supports_sample_weight': True}