9
9
10
10
11
11
import warnings
12
- from numbers import Real
12
+ from numbers import Integral , Real
13
13
14
14
import numpy as np
15
15
from scipy import sparse
24
24
_ClassNamePrefixFeaturesOutMixin ,
25
25
)
26
26
from ..utils import check_array
27
- from ..utils ._param_validation import StrOptions
27
+ from ..utils ._param_validation import Interval , StrOptions
28
28
from ..utils .extmath import _incremental_mean_and_var , row_norms
29
29
from ..utils .sparsefuncs_fast import (
30
30
inplace_csr_row_normalize_l1 ,
@@ -2417,7 +2417,7 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
2417
2417
matrix are discarded to compute the quantile statistics. If False,
2418
2418
these entries are treated as zeros.
2419
2419
2420
- subsample : int, default=1e5
2420
+ subsample : int, default=10_000
2421
2421
Maximum number of samples used to estimate the quantiles for
2422
2422
computational efficiency. Note that the subsampling procedure may
2423
2423
differ for value-identical sparse and dense matrices.
@@ -2486,13 +2486,22 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
2486
2486
array([...])
2487
2487
"""
2488
2488
2489
+ _parameter_constraints = {
2490
+ "n_quantiles" : [Interval (Integral , 1 , None , closed = "left" )],
2491
+ "output_distribution" : [StrOptions ({"uniform" , "normal" })],
2492
+ "ignore_implicit_zeros" : ["boolean" ],
2493
+ "subsample" : [Interval (Integral , 1 , None , closed = "left" )],
2494
+ "random_state" : ["random_state" ],
2495
+ "copy" : ["boolean" ],
2496
+ }
2497
+
2489
2498
def __init__ (
2490
2499
self ,
2491
2500
* ,
2492
2501
n_quantiles = 1000 ,
2493
2502
output_distribution = "uniform" ,
2494
2503
ignore_implicit_zeros = False ,
2495
- subsample = int ( 1e5 ) ,
2504
+ subsample = 10_000 ,
2496
2505
random_state = None ,
2497
2506
copy = True ,
2498
2507
):
@@ -2599,19 +2608,7 @@ def fit(self, X, y=None):
2599
2608
self : object
2600
2609
Fitted transformer.
2601
2610
"""
2602
- if self .n_quantiles <= 0 :
2603
- raise ValueError (
2604
- "Invalid value for 'n_quantiles': %d. "
2605
- "The number of quantiles must be at least one."
2606
- % self .n_quantiles
2607
- )
2608
-
2609
- if self .subsample <= 0 :
2610
- raise ValueError (
2611
- "Invalid value for 'subsample': %d. "
2612
- "The number of subsamples must be at least one."
2613
- % self .subsample
2614
- )
2611
+ self ._validate_params ()
2615
2612
2616
2613
if self .n_quantiles > self .subsample :
2617
2614
raise ValueError (
@@ -2729,13 +2726,6 @@ def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):
2729
2726
"QuantileTransformer only accepts non-negative sparse matrices."
2730
2727
)
2731
2728
2732
- # check the output distribution
2733
- if self .output_distribution not in ("normal" , "uniform" ):
2734
- raise ValueError (
2735
- "'output_distribution' has to be either 'normal'"
2736
- " or 'uniform'. Got '{}' instead." .format (self .output_distribution )
2737
- )
2738
-
2739
2729
return X
2740
2730
2741
2731
def _transform (self , X , inverse = False ):
@@ -3055,6 +3045,12 @@ class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
3055
3045
[ 1.106... 1.414...]]
3056
3046
"""
3057
3047
3048
+ _parameter_constraints = {
3049
+ "method" : [StrOptions ({"yeo-johnson" , "box-cox" })],
3050
+ "standardize" : ["boolean" ],
3051
+ "copy" : ["boolean" ],
3052
+ }
3053
+
3058
3054
def __init__ (self , method = "yeo-johnson" , * , standardize = True , copy = True ):
3059
3055
self .method = method
3060
3056
self .standardize = standardize
@@ -3079,6 +3075,7 @@ def fit(self, X, y=None):
3079
3075
self : object
3080
3076
Fitted transformer.
3081
3077
"""
3078
+ self ._validate_params ()
3082
3079
self ._fit (X , y = y , force_transform = False )
3083
3080
return self
3084
3081
@@ -3099,10 +3096,11 @@ def fit_transform(self, X, y=None):
3099
3096
X_new : ndarray of shape (n_samples, n_features)
3100
3097
Transformed data.
3101
3098
"""
3099
+ self ._validate_params ()
3102
3100
return self ._fit (X , y , force_transform = True )
3103
3101
3104
3102
def _fit (self , X , y = None , force_transform = False ):
3105
- X = self ._check_input (X , in_fit = True , check_positive = True , check_method = True )
3103
+ X = self ._check_input (X , in_fit = True , check_positive = True )
3106
3104
3107
3105
if not self .copy and not force_transform : # if call from fit()
3108
3106
X = X .copy () # force copy so that fit does not change X inplace
@@ -3305,9 +3303,7 @@ def _neg_log_likelihood(lmbda):
3305
3303
# choosing bracket -2, 2 like for boxcox
3306
3304
return optimize .brent (_neg_log_likelihood , brack = (- 2 , 2 ))
3307
3305
3308
- def _check_input (
3309
- self , X , in_fit , check_positive = False , check_shape = False , check_method = False
3310
- ):
3306
+ def _check_input (self , X , in_fit , check_positive = False , check_shape = False ):
3311
3307
"""Validate the input before fit and transform.
3312
3308
3313
3309
Parameters
@@ -3324,9 +3320,6 @@ def _check_input(
3324
3320
3325
3321
check_shape : bool, default=False
3326
3322
If True, check that n_features matches the length of self.lambdas_
3327
-
3328
- check_method : bool, default=False
3329
- If True, check that the transformation method is valid.
3330
3323
"""
3331
3324
X = self ._validate_data (
3332
3325
X ,
@@ -3353,14 +3346,6 @@ def _check_input(
3353
3346
)
3354
3347
)
3355
3348
3356
- valid_methods = ("box-cox" , "yeo-johnson" )
3357
- if check_method and self .method not in valid_methods :
3358
- raise ValueError (
3359
- "'method' must be one of {}, got {} instead." .format (
3360
- valid_methods , self .method
3361
- )
3362
- )
3363
-
3364
3349
return X
3365
3350
3366
3351
def _more_tags (self ):
0 commit comments