Skip to content

TST refactor instance generation and parameter setting #29702

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 4, 2024
2 changes: 1 addition & 1 deletion doc/sphinxext/allow_nan_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from docutils.parsers.rst import Directive

from sklearn.utils import all_estimators
from sklearn.utils._test_common.instance_generator import _construct_instance
from sklearn.utils._testing import SkipTest
from sklearn.utils.estimator_checks import _construct_instance


class AllowNanEstimators(Directive):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
yield_namespace_device_dtype_combinations,
)
from sklearn.utils._array_api import device as array_device
from sklearn.utils._test_common.instance_generator import _get_check_estimator_ids
from sklearn.utils._testing import _array_api_for_tests, assert_allclose
from sklearn.utils.estimator_checks import (
_get_check_estimator_ids,
check_array_api_input_and_values,
)
from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
Expand Down
2 changes: 1 addition & 1 deletion sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
yield_namespace_device_dtype_combinations,
yield_namespaces,
)
from sklearn.utils._test_common.instance_generator import _get_check_estimator_ids
from sklearn.utils._testing import (
assert_allclose,
assert_almost_equal,
Expand All @@ -57,7 +58,6 @@
)
from sklearn.utils.estimator_checks import (
_array_api_for_tests,
_get_check_estimator_ids,
check_array_api_input_and_values,
)
from sklearn.utils.fixes import (
Expand Down
2 changes: 1 addition & 1 deletion sklearn/preprocessing/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from sklearn.utils._array_api import (
yield_namespace_device_dtype_combinations,
)
from sklearn.utils._test_common.instance_generator import _get_check_estimator_ids
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
Expand All @@ -51,7 +52,6 @@
skip_if_32bit,
)
from sklearn.utils.estimator_checks import (
_get_check_estimator_ids,
check_array_api_input_and_values,
)
from sklearn.utils.fixes import (
Expand Down
104 changes: 13 additions & 91 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import warnings
from functools import partial
from inspect import isgenerator, signature
from itertools import chain, product
from itertools import chain

import numpy as np
import pytest
Expand All @@ -26,32 +26,24 @@
MeanShift,
SpectralClustering,
)
from sklearn.compose import ColumnTransformer
from sklearn.datasets import make_blobs
from sklearn.decomposition import PCA
from sklearn.exceptions import ConvergenceWarning, FitFailedWarning

# make it possible to discover experimental estimators when calling `all_estimators`
from sklearn.experimental import (
enable_halving_search_cv, # noqa
enable_iterative_imputer, # noqa
)
from sklearn.linear_model import LogisticRegression, Ridge

# make it possible to discover experimental estimators when calling `all_estimators`
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.model_selection import (
GridSearchCV,
HalvingGridSearchCV,
HalvingRandomSearchCV,
RandomizedSearchCV,
)
from sklearn.neighbors import (
KNeighborsClassifier,
KNeighborsRegressor,
LocalOutlierFactor,
RadiusNeighborsClassifier,
RadiusNeighborsRegressor,
)
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import (
FunctionTransformer,
MinMaxScaler,
Expand All @@ -61,15 +53,19 @@
from sklearn.semi_supervised import LabelPropagation, LabelSpreading
from sklearn.utils import all_estimators
from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags
from sklearn.utils._test_common.instance_generator import (
_generate_column_transformer_instances,
_generate_pipeline,
_generate_search_cv_instances,
_get_check_estimator_ids,
_set_checking_parameters,
_tested_estimators,
)
from sklearn.utils._testing import (
SkipTest,
ignore_warnings,
set_random_state,
)
from sklearn.utils.estimator_checks import (
_construct_instance,
_get_check_estimator_ids,
_set_checking_parameters,
check_dataframe_column_names_consistency,
check_estimator,
check_get_feature_names_out_error,
Expand Down Expand Up @@ -137,26 +133,6 @@ def test_get_check_estimator_ids(val, expected):
assert _get_check_estimator_ids(val) == expected


def _tested_estimators(type_filter=None):
for name, Estimator in all_estimators(type_filter=type_filter):
try:
estimator = _construct_instance(Estimator)
except SkipTest:
continue

yield estimator


def _generate_pipeline():
for final_estimator in [Ridge(), LogisticRegression()]:
yield Pipeline(
steps=[
("scaler", StandardScaler()),
("final_estimator", final_estimator),
]
)


@parametrize_with_checks(list(chain(_tested_estimators(), _generate_pipeline())))
def test_estimators(estimator, check, request):
# Common tests for estimator instances
Expand Down Expand Up @@ -259,60 +235,6 @@ def test_class_support_removed():
parametrize_with_checks([LogisticRegression])


def _generate_column_transformer_instances():
yield ColumnTransformer(
transformers=[
("trans1", StandardScaler(), [0, 1]),
]
)


def _generate_search_cv_instances():
for SearchCV, (Estimator, param_grid) in product(
[
GridSearchCV,
HalvingGridSearchCV,
RandomizedSearchCV,
HalvingGridSearchCV,
],
[
(Ridge, {"alpha": [0.1, 1.0]}),
(LogisticRegression, {"C": [0.1, 1.0]}),
],
):
init_params = signature(SearchCV).parameters
extra_params = (
{"min_resources": "smallest"} if "min_resources" in init_params else {}
)
search_cv = SearchCV(
Estimator(), param_grid, cv=2, error_score="raise", **extra_params
)
set_random_state(search_cv)
yield search_cv

for SearchCV, (Estimator, param_grid) in product(
[
GridSearchCV,
HalvingGridSearchCV,
RandomizedSearchCV,
HalvingRandomSearchCV,
],
[
(Ridge, {"ridge__alpha": [0.1, 1.0]}),
(LogisticRegression, {"logisticregression__C": [0.1, 1.0]}),
],
):
init_params = signature(SearchCV).parameters
extra_params = (
{"min_resources": "smallest"} if "min_resources" in init_params else {}
)
search_cv = SearchCV(
make_pipeline(PCA(), Estimator()), param_grid, cv=2, **extra_params
).set_params(error_score="raise")
set_random_state(search_cv)
yield search_cv


@parametrize_with_checks(list(_generate_search_cv_instances()))
def test_search_cv(estimator, check, request):
# Common tests for SearchCV instances
Expand Down
2 changes: 1 addition & 1 deletion sklearn/tests/test_docstring_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import FunctionTransformer
from sklearn.utils import all_estimators
from sklearn.utils._test_common.instance_generator import _construct_instance
from sklearn.utils._testing import (
_get_func_name,
check_docstring_parameters,
ignore_warnings,
)
from sklearn.utils.deprecation import _is_deprecated
from sklearn.utils.estimator_checks import (
_construct_instance,
_enforce_estimator_tags_X,
_enforce_estimator_tags_y,
)
Expand Down
2 changes: 2 additions & 0 deletions sklearn/utils/_test_common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
Loading