diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 552f1d243dcc3..2c27cb5f548c2 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -750,8 +750,10 @@ def check_sample_weights_invariance(name, estimator_orig): estimator1 = clone(estimator_orig) estimator2 = clone(estimator_orig) + estimator3 = clone(estimator_orig) set_random_state(estimator1, random_state=0) set_random_state(estimator2, random_state=0) + set_random_state(estimator3, random_state=0) X = np.array([[1, 3], [1, 3], [1, 3], [1, 3], [2, 1], [2, 1], [2, 1], [2, 1], @@ -759,22 +761,40 @@ def check_sample_weights_invariance(name, estimator_orig): [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.dtype('float')) y = np.array([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2], dtype=np.dtype('int')) + + X2 = np.vstack([X, X]) + y2 = np.hstack([y, 3 - y]) y = _enforce_estimator_tags_y(estimator1, y) + y2 = _enforce_estimator_tags_y(estimator3, y2) + weights = np.ones(shape=len(y) * 2) + weights[len(y):] = 0 + X2, y2, weights = shuffle(X2, y2, weights, random_state=0) estimator1.fit(X, y=y, sample_weight=np.ones(shape=len(y))) estimator2.fit(X, y=y, sample_weight=None) + estimator3.fit(X2, y=y2, sample_weight=weights) - for method in ["predict", "transform"]: + for method in ["predict", "predict_proba", + "decision_function", "transform"]: if hasattr(estimator_orig, method): X_pred1 = getattr(estimator1, method)(X) X_pred2 = getattr(estimator2, method)(X) + X_pred3 = getattr(estimator3, method)(X) if sparse.issparse(X_pred1): X_pred1 = X_pred1.toarray() X_pred2 = X_pred2.toarray() + X_pred3 = X_pred3.toarray() assert_allclose(X_pred1, X_pred2, err_msg="For %s sample_weight=None is not" " equivalent to sample_weight=ones" % name) + assert_allclose( + X_pred1, X_pred3, + err_msg="For %s sample_weight is not" + " equivalent to removing samples" + % name) + + @ignore_warnings(category=(DeprecationWarning, FutureWarning, UserWarning))