MAINT Use _validate_params in Power and Quantile Transformer (#23672)

Gandagorn · glemaitre · web-flow · commit a63a827e6db3 · 2022-06-28T14:59:34.000+02:00
Co-authored-by: Guillaume Lemaitre &lt;g.lemaitre58@gmail.com&gt;
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
@@ -9,7 +9,7 @@
 
 
 import warnings
-from numbers import Real
+from numbers import Integral, Real
 
 import numpy as np
 from scipy import sparse
@@ -24,7 +24,7 @@
     _ClassNamePrefixFeaturesOutMixin,
 )
 from ..utils import check_array
-from ..utils._param_validation import StrOptions
+from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import _incremental_mean_and_var, row_norms
 from ..utils.sparsefuncs_fast import (
     inplace_csr_row_normalize_l1,
@@ -2417,7 +2417,7 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
         matrix are discarded to compute the quantile statistics. If False,
         these entries are treated as zeros.
 
-    subsample : int, default=1e5
+    subsample : int, default=10_000
         Maximum number of samples used to estimate the quantiles for
         computational efficiency. Note that the subsampling procedure may
         differ for value-identical sparse and dense matrices.
@@ -2486,13 +2486,22 @@ class QuantileTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator
     array([...])
     """
 
+    _parameter_constraints = {
+        "n_quantiles": [Interval(Integral, 1, None, closed="left")],
+        "output_distribution": [StrOptions({"uniform", "normal"})],
+        "ignore_implicit_zeros": ["boolean"],
+        "subsample": [Interval(Integral, 1, None, closed="left")],
+        "random_state": ["random_state"],
+        "copy": ["boolean"],
+    }
+
     def __init__(
         self,
         *,
         n_quantiles=1000,
         output_distribution="uniform",
         ignore_implicit_zeros=False,
-        subsample=int(1e5),
+        subsample=10_000,
         random_state=None,
         copy=True,
     ):
@@ -2599,19 +2608,7 @@ def fit(self, X, y=None):
         self : object
            Fitted transformer.
         """
-        if self.n_quantiles <= 0:
-            raise ValueError(
-                "Invalid value for 'n_quantiles': %d. "
-                "The number of quantiles must be at least one."
-                % self.n_quantiles
-            )
-
-        if self.subsample <= 0:
-            raise ValueError(
-                "Invalid value for 'subsample': %d. "
-                "The number of subsamples must be at least one."
-                % self.subsample
-            )
+        self._validate_params()
 
         if self.n_quantiles > self.subsample:
             raise ValueError(
@@ -2729,13 +2726,6 @@ def _check_inputs(self, X, in_fit, accept_sparse_negative=False, copy=False):
                     "QuantileTransformer only accepts non-negative sparse matrices."
                 )
 
-        # check the output distribution
-        if self.output_distribution not in ("normal", "uniform"):
-            raise ValueError(
-                "'output_distribution' has to be either 'normal'"
-                " or 'uniform'. Got '{}' instead.".format(self.output_distribution)
-            )
-
         return X
 
     def _transform(self, X, inverse=False):
@@ -3055,6 +3045,12 @@ class PowerTransformer(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
      [ 1.106...  1.414...]]
     """
 
+    _parameter_constraints = {
+        "method": [StrOptions({"yeo-johnson", "box-cox"})],
+        "standardize": ["boolean"],
+        "copy": ["boolean"],
+    }
+
     def __init__(self, method="yeo-johnson", *, standardize=True, copy=True):
         self.method = method
         self.standardize = standardize
@@ -3079,6 +3075,7 @@ def fit(self, X, y=None):
         self : object
             Fitted transformer.
         """
+        self._validate_params()
         self._fit(X, y=y, force_transform=False)
         return self
 
@@ -3099,10 +3096,11 @@ def fit_transform(self, X, y=None):
         X_new : ndarray of shape (n_samples, n_features)
             Transformed data.
         """
+        self._validate_params()
         return self._fit(X, y, force_transform=True)
 
     def _fit(self, X, y=None, force_transform=False):
-        X = self._check_input(X, in_fit=True, check_positive=True, check_method=True)
+        X = self._check_input(X, in_fit=True, check_positive=True)
 
         if not self.copy and not force_transform:  # if call from fit()
             X = X.copy()  # force copy so that fit does not change X inplace
@@ -3305,9 +3303,7 @@ def _neg_log_likelihood(lmbda):
         # choosing bracket -2, 2 like for boxcox
         return optimize.brent(_neg_log_likelihood, brack=(-2, 2))
 
-    def _check_input(
-        self, X, in_fit, check_positive=False, check_shape=False, check_method=False
-    ):
+    def _check_input(self, X, in_fit, check_positive=False, check_shape=False):
         """Validate the input before fit and transform.
 
         Parameters
@@ -3324,9 +3320,6 @@ def _check_input(
 
         check_shape : bool, default=False
             If True, check that n_features matches the length of self.lambdas_
-
-        check_method : bool, default=False
-            If True, check that the transformation method is valid.
         """
         X = self._validate_data(
             X,
@@ -3353,14 +3346,6 @@ def _check_input(
                 )
             )
 
-        valid_methods = ("box-cox", "yeo-johnson")
-        if check_method and self.method not in valid_methods:
-            raise ValueError(
-                "'method' must be one of {}, got {} instead.".format(
-                    valid_methods, self.method
-                )
-            )
-
         return X
 
     def _more_tags(self):
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
@@ -1235,12 +1235,6 @@ def test_quantile_transform_check_error():
     )
     X_neg = sparse.csc_matrix(X_neg)
 
-    err_msg = "Invalid value for 'n_quantiles': 0."
-    with pytest.raises(ValueError, match=err_msg):
-        QuantileTransformer(n_quantiles=0).fit(X)
-    err_msg = "Invalid value for 'subsample': 0."
-    with pytest.raises(ValueError, match=err_msg):
-        QuantileTransformer(subsample=0).fit(X)
     err_msg = (
         "The number of quantiles cannot be greater than "
         "the number of samples used. Got 1000 quantiles "
@@ -1267,32 +1261,7 @@ def test_quantile_transform_check_error():
     with pytest.raises(ValueError, match=err_msg):
         transformer.inverse_transform(X_bad_feat)
 
-    transformer = QuantileTransformer(n_quantiles=10, output_distribution="rnd")
-    # check that an error is raised at fit time
-    err_msg = (
-        "'output_distribution' has to be either 'normal' or "
-        "'uniform'. Got 'rnd' instead."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        transformer.fit(X)
-    # check that an error is raised at transform time
-    transformer.output_distribution = "uniform"
-    transformer.fit(X)
-    X_tran = transformer.transform(X)
-    transformer.output_distribution = "rnd"
-    err_msg = (
-        "'output_distribution' has to be either 'normal' or 'uniform'."
-        " Got 'rnd' instead."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        transformer.transform(X)
-    # check that an error is raised at inverse_transform time
-    err_msg = (
-        "'output_distribution' has to be either 'normal' or 'uniform'."
-        " Got 'rnd' instead."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        transformer.inverse_transform(X_tran)
+    transformer = QuantileTransformer(n_quantiles=10).fit(X)
     # check that an error is raised if input is scalar
     with pytest.raises(ValueError, match="Expected 2D array, got scalar array instead"):
         transformer.transform(10)
@@ -2426,16 +2395,6 @@ def test_power_transformer_shape_exception(method):
         pt.inverse_transform(X[:, 0:1])
 
 
-def test_power_transformer_method_exception():
-    pt = PowerTransformer(method="monty-python")
-    X = np.abs(X_2d)
-
-    # An exception should be raised if PowerTransformer.method isn't valid
-    bad_method_message = "'method' must be one of"
-    with pytest.raises(ValueError, match=bad_method_message):
-        pt.fit(X)
-
-
 def test_power_transformer_lambda_zero():
     pt = PowerTransformer(method="box-cox", standardize=False)
     X = np.abs(X_2d)[:, 0:1]
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -560,10 +560,8 @@ def test_estimators_do_not_raise_errors_in_init_or_set_params(Estimator):
     "PoissonRegressor",
     "PolynomialCountSketch",
     "PolynomialFeatures",
-    "PowerTransformer",
     "QuadraticDiscriminantAnalysis",
     "QuantileRegressor",
-    "QuantileTransformer",
     "RANSACRegressor",
     "RBFSampler",
     "RFE",