Skip to content

MAINT validate parameters in DummyRegressor and DummyClassifier #23645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 28, 2022
Merged
49 changes: 26 additions & 23 deletions sklearn/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
# License: BSD 3 clause

import warnings
from numbers import Integral, Real

import numpy as np
import scipy.sparse as sp

from .base import BaseEstimator, ClassifierMixin, RegressorMixin
from .base import MultiOutputMixin
from .utils import check_random_state
from .utils import deprecated
from .utils._param_validation import StrOptions, Interval
from .utils.validation import _num_samples
from .utils.validation import check_array
from .utils.validation import check_consistent_length
Expand Down Expand Up @@ -134,6 +137,14 @@ class prior probabilities.
0.75
"""

_parameter_constraints = {
"strategy": [
StrOptions({"most_frequent", "prior", "stratified", "uniform", "constant"})
],
"random_state": ["random_state"],
"constant": [Integral, str, "array-like", None],
}

def __init__(self, *, strategy="prior", random_state=None, constant=None):
self.strategy = strategy
self.random_state = random_state
Expand All @@ -158,19 +169,7 @@ def fit(self, X, y, sample_weight=None):
self : object
Returns the instance itself.
"""
allowed_strategies = (
"most_frequent",
"stratified",
"uniform",
"constant",
"prior",
)

if self.strategy not in allowed_strategies:
raise ValueError(
"Unknown strategy type: %s, expected one of %s."
% (self.strategy, allowed_strategies)
)
self._validate_params()

self._strategy = self.strategy

Expand Down Expand Up @@ -527,6 +526,16 @@ class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
0.0
"""

_parameter_constraints = {
"strategy": [StrOptions({"mean", "median", "quantile", "constant"})],
"quantile": [Interval(Real, 0.0, 1.0, closed="both"), None],
"constant": [
Interval(Real, None, None, closed="neither"),
"array-like",
None,
],
}

def __init__(self, *, strategy="mean", constant=None, quantile=None):
self.strategy = strategy
self.constant = constant
Expand All @@ -551,12 +560,7 @@ def fit(self, X, y, sample_weight=None):
self : object
Fitted estimator.
"""
allowed_strategies = ("mean", "median", "quantile", "constant")
if self.strategy not in allowed_strategies:
raise ValueError(
"Unknown strategy type: %s, expected one of %s."
% (self.strategy, allowed_strategies)
)
self._validate_params()

y = check_array(y, ensure_2d=False, input_name="y")
if len(y) == 0:
Expand Down Expand Up @@ -584,12 +588,11 @@ def fit(self, X, y, sample_weight=None):
]

elif self.strategy == "quantile":
if self.quantile is None or not np.isscalar(self.quantile):
if self.quantile is None:
raise ValueError(
"Quantile must be a scalar in the range [0.0, 1.0], but got %s."
% self.quantile
"When using `strategy='quantile', you have to specify the desired "
"quantile in the range [0, 1]."
)

percentile = self.quantile * 100.0
if sample_weight is None:
self.constant_ = np.percentile(y, axis=0, q=percentile)
Expand Down
9 changes: 9 additions & 0 deletions sklearn/model_selection/tests/test_successive_halving.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ class FastClassifier(DummyClassifier):
These parameter don't affect the predictions and are useful for fast
grid searching."""

# update the constraints such that we accept all parameters from a to z
_parameter_constraints = {
**DummyClassifier._parameter_constraints,
**{
chr(key): "no_validation" # type: ignore
for key in range(ord("a"), ord("z") + 1)
},
}

def __init__(
self, strategy="stratified", random_state=None, constant=None, **kwargs
):
Expand Down
2 changes: 0 additions & 2 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,8 +468,6 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator):
"DBSCAN",
"DictVectorizer",
"DictionaryLearning",
"DummyClassifier",
"DummyRegressor",
"ElasticNet",
"ElasticNetCV",
"EllipticEnvelope",
Expand Down
52 changes: 8 additions & 44 deletions sklearn/tests/test_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,17 +231,6 @@ def test_classifier_prediction_independent_of_X(strategy):
assert_array_equal(predictions1, predictions2)


def test_classifier_exceptions():
clf = DummyClassifier(strategy="unknown")
with pytest.raises(ValueError):
clf.fit([], [])

with pytest.raises(NotFittedError):
clf.predict([])
with pytest.raises(NotFittedError):
clf.predict_proba([])


def test_mean_strategy_regressor():

random_state = np.random.RandomState(seed=1)
Expand Down Expand Up @@ -379,28 +368,11 @@ def test_quantile_invalid():
X = [[0]] * 5 # ignored
y = [0] * 5 # ignored

est = DummyRegressor(strategy="quantile")
with pytest.raises(ValueError):
est.fit(X, y)

est = DummyRegressor(strategy="quantile", quantile=None)
with pytest.raises(ValueError):
est.fit(X, y)

est = DummyRegressor(strategy="quantile", quantile=[0])
with pytest.raises(ValueError):
est.fit(X, y)

est = DummyRegressor(strategy="quantile", quantile=-0.1)
with pytest.raises(ValueError):
est.fit(X, y)

est = DummyRegressor(strategy="quantile", quantile=1.1)
with pytest.raises(ValueError):
est.fit(X, y)

est = DummyRegressor(strategy="quantile", quantile="abc")
with pytest.raises(TypeError):
err_msg = (
"When using `strategy='quantile', you have to specify the desired quantile"
)
with pytest.raises(ValueError, match=err_msg):
est.fit(X, y)


Expand Down Expand Up @@ -462,21 +434,13 @@ def test_y_mean_attribute_regressor():
assert est.constant_ == np.mean(y)


def test_unknown_strategey_regressor():
X = [[0]] * 5
y = [1, 2, 4, 6, 8]

est = DummyRegressor(strategy="gona")
with pytest.raises(ValueError):
est.fit(X, y)


def test_constants_not_specified_regressor():
X = [[0]] * 5
y = [1, 2, 4, 6, 8]

est = DummyRegressor(strategy="constant")
with pytest.raises(TypeError):
err_msg = "Constant target value has to be specified"
with pytest.raises(TypeError, match=err_msg):
est.fit(X, y)


Expand All @@ -486,7 +450,8 @@ def test_constant_size_multioutput_regressor():
y = random_state.randn(10, 5)

est = DummyRegressor(strategy="constant", constant=[1, 2, 3, 4])
with pytest.raises(ValueError):
err_msg = r"Constant target value should have shape \(5, 1\)."
with pytest.raises(ValueError, match=err_msg):
est.fit(X, y)


Expand Down Expand Up @@ -554,7 +519,6 @@ def test_constant_strategy_exceptions(y, params, err_msg):
X = [[0], [0], [0], [0]]

clf = DummyClassifier(strategy="constant", **params)

with pytest.raises(ValueError, match=err_msg):
clf.fit(X, y)

Expand Down