Skip to content

TST enable to run common test on stacking and voting estimators #18045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions sklearn/ensemble/tests/test_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
from sklearn.svm import LinearSVC
from sklearn.svm import LinearSVR
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import scale
Expand All @@ -44,8 +42,6 @@
from sklearn.utils._testing import assert_allclose
from sklearn.utils._testing import assert_allclose_dense_sparse
from sklearn.utils._testing import ignore_warnings
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.estimator_checks import check_no_attributes_set_in_init

X_diabetes, y_diabetes = load_diabetes(return_X_y=True)
X_iris, y_iris = load_iris(return_X_y=True)
Expand Down Expand Up @@ -368,24 +364,6 @@ def test_stacking_randomness(estimator, X, y):
)


# These warnings are raised due to _BaseComposition
@pytest.mark.filterwarnings("ignore:TypeError occurred during set_params")
@pytest.mark.filterwarnings("ignore:Estimator's parameters changed after")
@pytest.mark.parametrize(
"estimator",
[StackingClassifier(
estimators=[('lr', LogisticRegression(random_state=0)),
('tree', DecisionTreeClassifier(random_state=0))]),
StackingRegressor(
estimators=[('lr', LinearRegression()),
('tree', DecisionTreeRegressor(random_state=0))])],
ids=['StackingClassifier', 'StackingRegressor']
)
def test_check_estimators_stacking_estimator(estimator):
check_estimator(estimator)
check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)


def test_stacking_classifier_stratify_default():
# check that we stratify the classes for the default CV
clf = StackingClassifier(
Expand Down
19 changes: 0 additions & 19 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from sklearn.utils._testing import assert_almost_equal, assert_array_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_raise_message
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
Expand Down Expand Up @@ -490,23 +488,6 @@ def test_none_estimator_with_weights(X, y, voter):
assert y_pred.shape == y.shape


@pytest.mark.parametrize(
"estimator",
[VotingRegressor(
estimators=[('lr', LinearRegression()),
('tree', DecisionTreeRegressor(random_state=0))]),
VotingClassifier(
estimators=[('lr', LogisticRegression(random_state=0)),
('tree', DecisionTreeClassifier(random_state=0))])],
ids=['VotingRegressor', 'VotingClassifier']
)
def test_check_estimators_voting_estimator(estimator):
# FIXME: to be removed when meta-estimators can specified themselves
# their testing parameters (for required parameters).
check_estimator(estimator)
check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)


@pytest.mark.parametrize(
"est",
[VotingRegressor(
Expand Down
22 changes: 19 additions & 3 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from ._testing import create_memmap_backed_data
from ._testing import raises
from . import is_scalar_nan

from ..discriminant_analysis import LinearDiscriminantAnalysis
from ..linear_model import LogisticRegression
from ..linear_model import Ridge

from ..base import (
Expand Down Expand Up @@ -343,10 +345,24 @@ def _construct_instance(Estimator):
estimator = Estimator(Ridge())
else:
estimator = Estimator(LinearDiscriminantAnalysis())
elif required_parameters in (['estimators'],):
# Heterogeneous ensemble classes (i.e. stacking, voting)
if issubclass(Estimator, RegressorMixin):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except that just because the meta is a regressor, it doesn't mean it consists of regressors...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say that it could be a reasonably default for the moment (we can add a meaningful comment).
Right now we are not testing every combination in common tests even for other estimators so it might be a start?

estimator = Estimator(estimators=[
("est1", Ridge(alpha=0.1)),
("est2", Ridge(alpha=1))
])
else:
estimator = Estimator(estimators=[
("est1", LogisticRegression(C=0.1)),
("est2", LogisticRegression(C=1))
])
else:
raise SkipTest("Can't instantiate estimator {} which requires "
"parameters {}".format(Estimator.__name__,
required_parameters))
msg = (f"Can't instantiate estimator {Estimator.__name__} "
f"parameters {required_parameters}")
# raise additional warning to be shown by pytest
warnings.warn(msg, SkipTestWarning)
raise SkipTest(msg)
else:
estimator = Estimator()
return estimator
Expand Down