Skip to content

Commit a63a827

Browse files
Gandagornglemaitre
andauthored
MAINT Use _validate_params in Power and Quantile Transformer (#23672)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent a00441f commit a63a827

File tree

3 files changed

+25
-83
lines changed

3 files changed

+25
-83
lines changed

sklearn/preprocessing/_data.py

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
import warnings
12-
from numbers import Real
12+
from numbers import Integral, Real
1313

1414
import numpy as np
1515
from scipy import sparse
@@ -24,7 +24,7 @@
2424
_ClassNamePrefixFeaturesOutMixin,
2525
)
2626
from ..utils import check_array
27-
from ..utils._param_validation import StrOptions
27+
from ..utils._param_validation import Interval, StrOptions
2828
from ..utils.extmath import _incremental_mean_and_var, row_norms
2929
from ..utils.sparsefuncs_fast import (
3030
inplace_csr_row_normalize_l1,
@@ -2417,7 +2417,7 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
24172417
matrix are discarded to compute the quantile statistics. If False,
24182418
these entries are treated as zeros.
24192419
2420-
subsample : int, default=1e5
2420+
subsample : int, default=10_000
24212421
Maximum number of samples used to estimate the quantiles for
24222422
computational efficiency. Note that the subsampling procedure may
24232423
differ for value-identical sparse and dense matrices.
@@ -2486,13 +2486,22 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
24862486
array([...])
24872487
"""
24882488

2489+
_parameter_constraints = {
2490+
"n_quantiles": [Interval(Integral, 1, None, closed="left")],
2491+
"output_distribution": [StrOptions({"uniform", "normal"})],
2492+
"ignore_implicit_zeros": ["boolean"],
2493+
"subsample": [Interval(Integral, 1, None, closed="left")],
2494+
"random_state": ["random_state"],
2495+
"copy": ["boolean"],
2496+
}
2497+
24892498
def __init__(
24902499
self,
24912500
*,
24922501
n_quantiles=1000,
24932502
output_distribution="uniform",
24942503
ignore_implicit_zeros=False,
2495-
subsample=int(1e5),
2504+
subsample=10_000,
24962505
random_state=None,
24972506
copy=True,
24982507
):
@@ -2599,19 +2608,7 @@ def fit(self, X, y=None):
25992608
self : object
26002609
Fitted transformer.
26012610
"""
2602-
if self.n_quantiles <= 0:
2603-
raise ValueError(
2604-
"Invalid value for 'n_quantiles': %d. "
2605-
"The number of quantiles must be at least one."
2606-
% self.n_quantiles
2607-
)
2608-
2609-
if self.subsample <= 0:
2610-
raise ValueError(
2611-
"Invalid value for 'subsample': %d. "
2612-
"The number of subsamples must be at least one."
2613-
% self.subsample
2614-
)
2611+
self._validate_params()
26152612

26162613
if self.n_quantiles > self.subsample:
26172614
raise ValueError(
@@ -2729,13 +2726,6 @@ def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):
27292726
"QuantileTransformer only accepts non-negative sparse matrices."
27302727
)
27312728

2732-
# check the output distribution
2733-
if self.output_distribution not in ("normal", "uniform"):
2734-
raise ValueError(
2735-
"'output_distribution' has to be either 'normal'"
2736-
" or 'uniform'. Got '{}' instead.".format(self.output_distribution)
2737-
)
2738-
27392729
return X
27402730

27412731
def _transform(self, X, inverse=False):
@@ -3055,6 +3045,12 @@ class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
30553045
[ 1.106... 1.414...]]
30563046
"""
30573047

3048+
_parameter_constraints = {
3049+
"method": [StrOptions({"yeo-johnson", "box-cox"})],
3050+
"standardize": ["boolean"],
3051+
"copy": ["boolean"],
3052+
}
3053+
30583054
def __init__(self, method="yeo-johnson", *, standardize=True, copy=True):
30593055
self.method = method
30603056
self.standardize = standardize
@@ -3079,6 +3075,7 @@ def fit(self, X, y=None):
30793075
self : object
30803076
Fitted transformer.
30813077
"""
3078+
self._validate_params()
30823079
self._fit(X, y=y, force_transform=False)
30833080
return self
30843081

@@ -3099,10 +3096,11 @@ def fit_transform(self, X, y=None):
30993096
X_new : ndarray of shape (n_samples, n_features)
31003097
Transformed data.
31013098
"""
3099+
self._validate_params()
31023100
return self._fit(X, y, force_transform=True)
31033101

31043102
def _fit(self, X, y=None, force_transform=False):
3105-
X = self._check_input(X, in_fit=True, check_positive=True, check_method=True)
3103+
X = self._check_input(X, in_fit=True, check_positive=True)
31063104

31073105
if not self.copy and not force_transform: # if call from fit()
31083106
X = X.copy() # force copy so that fit does not change X inplace
@@ -3305,9 +3303,7 @@ def _neg_log_likelihood(lmbda):
33053303
# choosing bracket -2, 2 like for boxcox
33063304
return optimize.brent(_neg_log_likelihood, brack=(-2, 2))
33073305

3308-
def _check_input(
3309-
self, X, in_fit, check_positive=False, check_shape=False, check_method=False
3310-
):
3306+
def _check_input(self, X, in_fit, check_positive=False, check_shape=False):
33113307
"""Validate the input before fit and transform.
33123308
33133309
Parameters
@@ -3324,9 +3320,6 @@ def _check_input(
33243320
33253321
check_shape : bool, default=False
33263322
If True, check that n_features matches the length of self.lambdas_
3327-
3328-
check_method : bool, default=False
3329-
If True, check that the transformation method is valid.
33303323
"""
33313324
X = self._validate_data(
33323325
X,
@@ -3353,14 +3346,6 @@ def _check_input(
33533346
)
33543347
)
33553348

3356-
valid_methods = ("box-cox", "yeo-johnson")
3357-
if check_method and self.method not in valid_methods:
3358-
raise ValueError(
3359-
"'method' must be one of {}, got {} instead.".format(
3360-
valid_methods, self.method
3361-
)
3362-
)
3363-
33643349
return X
33653350

33663351
def _more_tags(self):

sklearn/preprocessing/tests/test_data.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,12 +1235,6 @@ def test_quantile_transform_check_error():
12351235
)
12361236
X_neg = sparse.csc_matrix(X_neg)
12371237

1238-
err_msg = "Invalid value for 'n_quantiles': 0."
1239-
with pytest.raises(ValueError, match=err_msg):
1240-
QuantileTransformer(n_quantiles=0).fit(X)
1241-
err_msg = "Invalid value for 'subsample': 0."
1242-
with pytest.raises(ValueError, match=err_msg):
1243-
QuantileTransformer(subsample=0).fit(X)
12441238
err_msg = (
12451239
"The number of quantiles cannot be greater than "
12461240
"the number of samples used. Got 1000 quantiles "
@@ -1267,32 +1261,7 @@ def test_quantile_transform_check_error():
12671261
with pytest.raises(ValueError, match=err_msg):
12681262
transformer.inverse_transform(X_bad_feat)
12691263

1270-
transformer = QuantileTransformer(n_quantiles=10, output_distribution="rnd")
1271-
# check that an error is raised at fit time
1272-
err_msg = (
1273-
"'output_distribution' has to be either 'normal' or "
1274-
"'uniform'. Got 'rnd' instead."
1275-
)
1276-
with pytest.raises(ValueError, match=err_msg):
1277-
transformer.fit(X)
1278-
# check that an error is raised at transform time
1279-
transformer.output_distribution = "uniform"
1280-
transformer.fit(X)
1281-
X_tran = transformer.transform(X)
1282-
transformer.output_distribution = "rnd"
1283-
err_msg = (
1284-
"'output_distribution' has to be either 'normal' or 'uniform'."
1285-
" Got 'rnd' instead."
1286-
)
1287-
with pytest.raises(ValueError, match=err_msg):
1288-
transformer.transform(X)
1289-
# check that an error is raised at inverse_transform time
1290-
err_msg = (
1291-
"'output_distribution' has to be either 'normal' or 'uniform'."
1292-
" Got 'rnd' instead."
1293-
)
1294-
with pytest.raises(ValueError, match=err_msg):
1295-
transformer.inverse_transform(X_tran)
1264+
transformer = QuantileTransformer(n_quantiles=10).fit(X)
12961265
# check that an error is raised if input is scalar
12971266
with pytest.raises(ValueError, match="Expected 2D array, got scalar array instead"):
12981267
transformer.transform(10)
@@ -2426,16 +2395,6 @@ def test_power_transformer_shape_exception(method):
24262395
pt.inverse_transform(X[:, 0:1])
24272396

24282397

2429-
def test_power_transformer_method_exception():
2430-
pt = PowerTransformer(method="monty-python")
2431-
X = np.abs(X_2d)
2432-
2433-
# An exception should be raised if PowerTransformer.method isn't valid
2434-
bad_method_message = "'method' must be one of"
2435-
with pytest.raises(ValueError, match=bad_method_message):
2436-
pt.fit(X)
2437-
2438-
24392398
def test_power_transformer_lambda_zero():
24402399
pt = PowerTransformer(method="box-cox", standardize=False)
24412400
X = np.abs(X_2d)[:, 0:1]

sklearn/tests/test_common.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,10 +560,8 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator):
560560
"PoissonRegressor",
561561
"PolynomialCountSketch",
562562
"PolynomialFeatures",
563-
"PowerTransformer",
564563
"QuadraticDiscriminantAnalysis",
565564
"QuantileRegressor",
566-
"QuantileTransformer",
567565
"RANSACRegressor",
568566
"RBFSampler",
569567
"RFE",

0 commit comments

Comments
 (0)