diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 7b31ee226664c..223f3253b406d 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -4,6 +4,8 @@ # License: BSD 3 clause import warnings +from numbers import Integral, Real + import numpy as np import scipy.sparse as sp @@ -11,6 +13,7 @@ from .base import MultiOutputMixin from .utils import check_random_state from .utils import deprecated +from .utils._param_validation import StrOptions, Interval from .utils.validation import _num_samples from .utils.validation import check_array from .utils.validation import check_consistent_length @@ -134,6 +137,14 @@ class prior probabilities. 0.75 """ + _parameter_constraints = { + "strategy": [ + StrOptions({"most_frequent", "prior", "stratified", "uniform", "constant"}) + ], + "random_state": ["random_state"], + "constant": [Integral, str, "array-like", None], + } + def __init__(self, *, strategy="prior", random_state=None, constant=None): self.strategy = strategy self.random_state = random_state @@ -158,19 +169,7 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - allowed_strategies = ( - "most_frequent", - "stratified", - "uniform", - "constant", - "prior", - ) - - if self.strategy not in allowed_strategies: - raise ValueError( - "Unknown strategy type: %s, expected one of %s." - % (self.strategy, allowed_strategies) - ) + self._validate_params() self._strategy = self.strategy @@ -527,6 +526,16 @@ class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): 0.0 """ + _parameter_constraints = { + "strategy": [StrOptions({"mean", "median", "quantile", "constant"})], + "quantile": [Interval(Real, 0.0, 1.0, closed="both"), None], + "constant": [ + Interval(Real, None, None, closed="neither"), + "array-like", + None, + ], + } + def __init__(self, *, strategy="mean", constant=None, quantile=None): self.strategy = strategy self.constant = constant @@ -551,12 +560,7 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - allowed_strategies = ("mean", "median", "quantile", "constant") - if self.strategy not in allowed_strategies: - raise ValueError( - "Unknown strategy type: %s, expected one of %s." - % (self.strategy, allowed_strategies) - ) + self._validate_params() y = check_array(y, ensure_2d=False, input_name="y") if len(y) == 0: @@ -584,12 +588,11 @@ def fit(self, X, y, sample_weight=None): ] elif self.strategy == "quantile": - if self.quantile is None or not np.isscalar(self.quantile): + if self.quantile is None: raise ValueError( - "Quantile must be a scalar in the range [0.0, 1.0], but got %s." - % self.quantile + "When using `strategy='quantile', you have to specify the desired " + "quantile in the range [0, 1]." ) - percentile = self.quantile * 100.0 if sample_weight is None: self.constant_ = np.percentile(y, axis=0, q=percentile) diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py index fe06957f5deed..a9cdeabed35f1 100644 --- a/sklearn/model_selection/tests/test_successive_halving.py +++ b/sklearn/model_selection/tests/test_successive_halving.py @@ -29,6 +29,15 @@ class FastClassifier(DummyClassifier): These parameter don't affect the predictions and are useful for fast grid searching.""" + # update the constraints such that we accept all parameters from a to z + _parameter_constraints = { + **DummyClassifier._parameter_constraints, + **{ + chr(key): "no_validation" # type: ignore + for key in range(ord("a"), ord("z") + 1) + }, + } + def __init__( self, strategy="stratified", random_state=None, constant=None, **kwargs ): diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 6523a5d4d206f..c4079d9e68eef 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -468,8 +468,6 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator): "DBSCAN", "DictVectorizer", "DictionaryLearning", - "DummyClassifier", - "DummyRegressor", "ElasticNet", "ElasticNetCV", "EllipticEnvelope", diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 61f8c2e4190e1..af5b539defb60 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -231,17 +231,6 @@ def test_classifier_prediction_independent_of_X(strategy): assert_array_equal(predictions1, predictions2) -def test_classifier_exceptions(): - clf = DummyClassifier(strategy="unknown") - with pytest.raises(ValueError): - clf.fit([], []) - - with pytest.raises(NotFittedError): - clf.predict([]) - with pytest.raises(NotFittedError): - clf.predict_proba([]) - - def test_mean_strategy_regressor(): random_state = np.random.RandomState(seed=1) @@ -379,28 +368,11 @@ def test_quantile_invalid(): X = [[0]] * 5 # ignored y = [0] * 5 # ignored - est = DummyRegressor(strategy="quantile") - with pytest.raises(ValueError): - est.fit(X, y) - est = DummyRegressor(strategy="quantile", quantile=None) - with pytest.raises(ValueError): - est.fit(X, y) - - est = DummyRegressor(strategy="quantile", quantile=[0]) - with pytest.raises(ValueError): - est.fit(X, y) - - est = DummyRegressor(strategy="quantile", quantile=-0.1) - with pytest.raises(ValueError): - est.fit(X, y) - - est = DummyRegressor(strategy="quantile", quantile=1.1) - with pytest.raises(ValueError): - est.fit(X, y) - - est = DummyRegressor(strategy="quantile", quantile="abc") - with pytest.raises(TypeError): + err_msg = ( + "When using `strategy='quantile', you have to specify the desired quantile" + ) + with pytest.raises(ValueError, match=err_msg): est.fit(X, y) @@ -462,21 +434,13 @@ def test_y_mean_attribute_regressor(): assert est.constant_ == np.mean(y) -def test_unknown_strategey_regressor(): - X = [[0]] * 5 - y = [1, 2, 4, 6, 8] - - est = DummyRegressor(strategy="gona") - with pytest.raises(ValueError): - est.fit(X, y) - - def test_constants_not_specified_regressor(): X = [[0]] * 5 y = [1, 2, 4, 6, 8] est = DummyRegressor(strategy="constant") - with pytest.raises(TypeError): + err_msg = "Constant target value has to be specified" + with pytest.raises(TypeError, match=err_msg): est.fit(X, y) @@ -486,7 +450,8 @@ def test_constant_size_multioutput_regressor(): y = random_state.randn(10, 5) est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4]) - with pytest.raises(ValueError): + err_msg = r"Constant target value should have shape \(5, 1\)." + with pytest.raises(ValueError, match=err_msg): est.fit(X, y) @@ -554,7 +519,6 @@ def test_constant_strategy_exceptions(y, params, err_msg): X = [[0], [0], [0], [0]] clf = DummyClassifier(strategy="constant", **params) - with pytest.raises(ValueError, match=err_msg): clf.fit(X, y)