diff --git a/sklearn/base.py b/sklearn/base.py index 5cced34d4b8f0..13bbcab96aa61 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -27,7 +27,7 @@ from .utils.validation import _num_features from .utils.validation import _check_feature_names_in from .utils.validation import _generate_get_feature_names_out -from .utils.validation import check_is_fitted +from .utils.validation import _is_fitted, check_is_fitted from .utils._metadata_requests import _MetadataRequester from .utils.validation import _get_feature_names from .utils._estimator_html_repr import estimator_html_repr @@ -1131,7 +1131,13 @@ def decorator(fit_method): @functools.wraps(fit_method) def wrapper(estimator, *args, **kwargs): global_skip_validation = get_config()["skip_parameter_validation"] - if not global_skip_validation: + + # we don't want to validate again for each call to partial_fit + partial_fit_and_fitted = ( + fit_method.__name__ == "partial_fit" and _is_fitted(estimator) + ) + + if not global_skip_validation and not partial_fit_and_fitted: estimator._validate_params() with config_context( diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 5e7bfe2ab4a31..e4869387f4166 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -25,6 +25,7 @@ RegressorMixin, clone, MetaEstimatorMixin, + _fit_context, ) from .preprocessing import label_binarize, LabelEncoder from .utils import ( @@ -318,6 +319,10 @@ def _get_estimator(self): return estimator + @_fit_context( + # CalibratedClassifierCV.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None, **fit_params): """Fit the calibrated model. @@ -341,8 +346,6 @@ def fit(self, X, y, sample_weight=None, **fit_params): self : object Returns an instance of self. """ - self._validate_params() - check_classification_targets(y) X, y = indexable(X, y) if sample_weight is not None: diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 8a3c2c2acde62..1ffc5f07e8c50 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -12,6 +12,7 @@ from ..exceptions import ConvergenceWarning from ..base import BaseEstimator, ClusterMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.validation import check_is_fitted @@ -469,6 +470,7 @@ def __init__( def _more_tags(self): return {"pairwise": self.affinity == "precomputed"} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the clustering from features, or affinity matrix. @@ -488,8 +490,6 @@ def fit(self, X, y=None): self Returns the instance itself. """ - self._validate_params() - if self.affinity == "precomputed": accept_sparse = False else: diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 059056275ef3d..b7d08a45dcd80 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -16,6 +16,7 @@ from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, ClusterMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..metrics.pairwise import paired_distances from ..metrics.pairwise import _VALID_METRICS from ..metrics import DistanceMetric @@ -950,6 +951,7 @@ def __init__( self.metric = metric self.compute_distances = compute_distances + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the hierarchical clustering from features, or distance matrix. @@ -968,7 +970,6 @@ def fit(self, X, y=None): self : object Returns the fitted instance. """ - self._validate_params() X = self._validate_data(X, ensure_min_samples=2) return self._fit(X) @@ -1324,6 +1325,7 @@ def __init__( ) self.pooling_func = pooling_func + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the hierarchical clustering on the data. @@ -1340,7 +1342,6 @@ def fit(self, X, y=None): self : object Returns the transformer. """ - self._validate_params() X = self._validate_data(X, ensure_min_features=2) super()._fit(X.T) self._n_features_out = self.n_clusters_ diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index ba837bacc99d5..4133264626ebb 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -13,6 +13,7 @@ from . import KMeans, MiniBatchKMeans from ..base import BaseEstimator, BiclusterMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils import check_scalar @@ -118,6 +119,7 @@ def __init__( def _check_parameters(self, n_samples): """Validate parameters depending on the input data.""" + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Create a biclustering for X. @@ -134,8 +136,6 @@ def fit(self, X, y=None): self : object SpectralBiclustering instance. """ - self._validate_params() - X = self._validate_data(X, accept_sparse="csr", dtype=np.float64) self._check_parameters(X.shape[0]) self._fit(X) diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 4c9d7921fdc70..e74630572a014 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -16,6 +16,7 @@ ClusterMixin, BaseEstimator, ClassNamePrefixFeaturesOutMixin, + _fit_context, ) from ..utils.extmath import row_norms from ..utils._param_validation import Interval @@ -501,6 +502,7 @@ def __init__( self.compute_labels = compute_labels self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """ Build a CF Tree for the input data. @@ -518,9 +520,6 @@ def fit(self, X, y=None): self Fitted estimator. """ - - self._validate_params() - return self._fit(X, partial=False) def _fit(self, X, partial): @@ -610,6 +609,7 @@ def _get_leaves(self): leaf_ptr = leaf_ptr.next_leaf_ return leaves + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X=None, y=None): """ Online learning. Prevents rebuilding of CFTree from scratch. @@ -629,8 +629,6 @@ def partial_fit(self, X=None, y=None): self Fitted estimator. """ - self._validate_params() - if X is None: # Perform just the final global clustering step. self._global_clustering() diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index fc2b38cc1bca9..959d78ae85009 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -6,6 +6,7 @@ import numpy as np import scipy.sparse as sp +from ..base import _fit_context from ._kmeans import _BaseKMeans from ._kmeans import _kmeans_single_elkan from ._kmeans import _kmeans_single_lloyd @@ -347,6 +348,7 @@ def _bisect(self, X, x_squared_norms, sample_weight, cluster_to_bisect): cluster_to_bisect.split(best_labels, best_centers, scores) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """Compute bisecting k-means clustering. @@ -373,8 +375,6 @@ def fit(self, X, y=None, sample_weight=None): self Fitted estimator. """ - self._validate_params() - X = self._validate_data( X, accept_sparse="csr", diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index aa81ef27702e6..3c753935ac046 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -16,6 +16,7 @@ from ..metrics.pairwise import _VALID_METRICS from ..base import BaseEstimator, ClusterMixin +from ..base import _fit_context from ..utils.validation import _check_sample_weight from ..utils._param_validation import Interval, StrOptions from ..neighbors import NearestNeighbors @@ -338,6 +339,10 @@ def __init__( self.p = p self.n_jobs = n_jobs + @_fit_context( + # DBSCAN.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, sample_weight=None): """Perform DBSCAN clustering from features, or distance matrix. @@ -363,8 +368,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Returns a fitted instance of self. """ - self._validate_params() - X = self._validate_data(X, accept_sparse="csr") if sample_weight is not None: diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index 971d5735fbe2b..b36999885a14e 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -23,6 +23,7 @@ ClusterMixin, TransformerMixin, ClassNamePrefixFeaturesOutMixin, + _fit_context, ) from ..metrics.pairwise import euclidean_distances from ..metrics.pairwise import _euclidean_distances @@ -1448,6 +1449,7 @@ def _warn_mkl_vcomp(self, n_active_threads): f" variable OMP_NUM_THREADS={n_active_threads}." ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """Compute k-means clustering. @@ -1475,8 +1477,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - X = self._validate_data( X, accept_sparse="csr", @@ -2057,6 +2057,7 @@ def _random_reassign(self): return True return False + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """Compute the centroids on X by chunking it into mini-batches. @@ -2084,8 +2085,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - X = self._validate_data( X, accept_sparse="csr", @@ -2214,6 +2213,7 @@ def fit(self, X, y=None, sample_weight=None): return self + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None, sample_weight=None): """Update k means estimate on a single mini-batch X. @@ -2241,9 +2241,6 @@ def partial_fit(self, X, y=None, sample_weight=None): """ has_centers = hasattr(self, "cluster_centers_") - if not has_centers: - self._validate_params() - X = self._validate_data( X, accept_sparse="csr", diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index 46a00ed3f0740..6b0f227d011f9 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -24,6 +24,7 @@ from ..utils.parallel import delayed, Parallel from ..utils import check_random_state, gen_batches, check_array from ..base import BaseEstimator, ClusterMixin +from ..base import _fit_context from ..neighbors import NearestNeighbors from ..metrics.pairwise import pairwise_distances_argmin from .._config import config_context @@ -435,6 +436,7 @@ def __init__( self.n_jobs = n_jobs self.max_iter = max_iter + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Perform clustering. @@ -451,7 +453,6 @@ def fit(self, X, y=None): self : object Fitted instance. """ - self._validate_params() X = self._validate_data(X) bandwidth = self.bandwidth if bandwidth is None: diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 0f1c66ada2d4e..ca1c74d6f44e7 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -24,6 +24,7 @@ from ..utils.validation import check_memory from ..neighbors import NearestNeighbors from ..base import BaseEstimator, ClusterMixin +from ..base import _fit_context from ..metrics import pairwise_distances from scipy.sparse import issparse, SparseEfficiencyWarning @@ -288,6 +289,10 @@ def __init__( self.memory = memory self.n_jobs = n_jobs + @_fit_context( + # Optics.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Perform OPTICS clustering. @@ -311,8 +316,6 @@ def fit(self, X, y=None): self : object Returns a fitted instance of self. """ - self._validate_params() - dtype = bool if self.metric in PAIRWISE_BOOLEAN_FUNCTIONS else float if dtype == bool and X.dtype != bool: msg = ( diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index e0ab7da938bfd..f72db4b7c1da3 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -15,6 +15,7 @@ from scipy.sparse import csc_matrix from ..base import BaseEstimator, ClusterMixin +from ..base import _fit_context from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils import check_random_state, as_float_array from ..metrics.pairwise import pairwise_kernels, KERNEL_PARAMS @@ -649,6 +650,7 @@ def __init__( self.n_jobs = n_jobs self.verbose = verbose + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Perform spectral clustering from features, or affinity matrix. @@ -671,8 +673,6 @@ def fit(self, X, y=None): self : object A fitted instance of the estimator. """ - self._validate_params() - X = self._validate_data( X, accept_sparse=["csr", "csc", "coo"], diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 4eddd12b1baea..f5dd7ba20e860 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -14,6 +14,7 @@ from scipy import sparse from ..base import clone, TransformerMixin +from ..base import _fit_context from ..utils._estimator_html_repr import _VisualBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer @@ -697,12 +698,15 @@ def fit(self, X, y=None): self : ColumnTransformer This estimator. """ - self._validate_params() # we use fit_transform to make sure to set sparse_output_ (for which we # need the transformed data) to have consistent output type in predict self.fit_transform(X, y=y) return self + @_fit_context( + # estimators in ColumnTransformer.transformers are not validated yet + prefer_skip_nested_validation=False + ) def fit_transform(self, X, y=None): """Fit all transformers, transform the data and concatenate results. @@ -724,7 +728,6 @@ def fit_transform(self, X, y=None): any result is a sparse matrix, everything will be converted to sparse matrices. """ - self._validate_params() self._check_feature_names(X, reset=True) X = _check_X(X) diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index f31a5a49b641e..e926ed7abe324 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -7,6 +7,7 @@ import numpy as np from ..base import BaseEstimator, RegressorMixin, clone +from ..base import _fit_context from ..utils.validation import check_is_fitted from ..utils._tags import _safe_tags from ..utils import check_array, _safe_indexing @@ -197,6 +198,10 @@ def _fit_transformer(self, y): UserWarning, ) + @_fit_context( + # TransformedTargetRegressor.regressor/transformer are not validated yet. + prefer_skip_nested_validation=False + ) def fit(self, X, y, **fit_params): """Fit the model according to the given training data. @@ -218,7 +223,6 @@ def fit(self, X, y, **fit_params): self : object Fitted estimator. """ - self._validate_params() if y is None: raise ValueError( f"This {self.__class__.__name__} estimator " diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py index 1ef0eedd62f64..c99f200592580 100644 --- a/sklearn/covariance/_elliptic_envelope.py +++ b/sklearn/covariance/_elliptic_envelope.py @@ -9,6 +9,7 @@ from ..utils.validation import check_is_fitted from ..metrics import accuracy_score from ..base import OutlierMixin +from ..base import _fit_context class EllipticEnvelope(OutlierMixin, MinCovDet): @@ -162,6 +163,7 @@ def __init__( ) self.contamination = contamination + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the EllipticEnvelope model. @@ -178,7 +180,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - # `_validate_params` is called in `MinCovDet` super().fit(X) self.offset_ = np.percentile(-self.dist_, 100.0 * self.contamination) return self diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index 7fc23f36d92d3..8083bfd2e1aa1 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -16,6 +16,7 @@ from .. import config_context from ..base import BaseEstimator +from ..base import _fit_context from ..utils import check_array from ..utils._param_validation import validate_params from ..utils.extmath import fast_logdet @@ -218,6 +219,7 @@ def get_precision(self): precision = linalg.pinvh(self.covariance_, check_finite=False) return precision + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the maximum likelihood covariance estimator to X. @@ -235,7 +237,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X) if self.assume_centered: self.location_ = np.zeros(X.shape[1]) diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py index afe21fa3a02f1..8575cc4f75801 100644 --- a/sklearn/covariance/_graph_lasso.py +++ b/sklearn/covariance/_graph_lasso.py @@ -16,6 +16,7 @@ from . import empirical_covariance, EmpiricalCovariance, log_likelihood +from ..base import _fit_context from ..exceptions import ConvergenceWarning from ..utils.validation import ( _is_arraylike_not_scalar, @@ -532,6 +533,7 @@ def __init__( self.alpha = alpha self.covariance = covariance + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the GraphicalLasso model to X. @@ -548,7 +550,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() # Covariance does not make sense for a single feature X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2) @@ -925,6 +926,7 @@ def __init__( self.cv = cv self.n_jobs = n_jobs + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the GraphicalLasso covariance model to X. @@ -941,7 +943,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() # Covariance does not make sense for a single feature X = self._validate_data(X, ensure_min_features=2) if self.assume_centered: diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py index f3dd6d60badf8..c723bba7a097b 100644 --- a/sklearn/covariance/_robust_covariance.py +++ b/sklearn/covariance/_robust_covariance.py @@ -15,6 +15,7 @@ from scipy.stats import chi2 from . import empirical_covariance, EmpiricalCovariance +from ..base import _fit_context from ..utils.extmath import fast_logdet from ..utils import check_random_state, check_array from ..utils._param_validation import Interval @@ -719,6 +720,7 @@ def __init__( self.support_fraction = support_fraction self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit a Minimum Covariance Determinant with the FastMCD algorithm. @@ -736,7 +738,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X, ensure_min_samples=2, estimator="MinCovDet") random_state = check_random_state(self.random_state) n_samples, n_features = X.shape diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 4bf3d9a490b6b..21d2e034b45d7 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -18,6 +18,7 @@ import numpy as np from . import empirical_covariance, EmpiricalCovariance +from ..base import _fit_context from ..utils import check_array from ..utils._param_validation import Interval, validate_params @@ -237,6 +238,7 @@ def __init__(self, *, store_precision=True, assume_centered=False, shrinkage=0.1 ) self.shrinkage = shrinkage + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the shrunk covariance model to X. @@ -254,7 +256,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X) # Not calling the parent object to fit, to avoid a potential # matrix inversion when setting the precision @@ -533,6 +534,7 @@ def __init__(self, *, store_precision=True, assume_centered=False, block_size=10 ) self.block_size = block_size + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the Ledoit-Wolf shrunk covariance model to X. @@ -549,7 +551,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() # Not calling the parent object to fit, to avoid computing the # covariance matrix (and potentially the precision) X = self._validate_data(X) @@ -722,6 +723,7 @@ class OAS(EmpiricalCovariance): 0.0195... """ + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the Oracle Approximating Shrinkage covariance model to X. @@ -738,8 +740,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - X = self._validate_data(X) # Not calling the parent object to fit, to avoid computing the # covariance matrix (and potentially the precision) diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index a5e5a1ceff09a..da395d8f060fb 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -16,6 +16,7 @@ from ..base import BaseEstimator, RegressorMixin, TransformerMixin from ..base import MultiOutputMixin from ..base import ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils import check_array, check_consistent_length from ..utils.fixes import sp_version from ..utils.fixes import parse_version @@ -208,6 +209,7 @@ def __init__( self.tol = tol self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, Y): """Fit model to data. @@ -226,8 +228,6 @@ def fit(self, X, Y): self : object Fitted model. """ - self._validate_params() - check_consistent_length(X, Y) X = self._validate_data( X, dtype=np.float64, copy=self.copy, ensure_min_samples=2 @@ -958,6 +958,7 @@ def __init__(self, n_components=2, *, scale=True, copy=True): self.scale = scale self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, Y): """Fit model to data. @@ -974,8 +975,6 @@ def fit(self, X, Y): self : object Fitted estimator. """ - self._validate_params() - check_consistent_length(X, Y) X = self._validate_data( X, dtype=np.float64, copy=self.copy, ensure_min_samples=2 diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 0e17f745dc6e9..819347b27e879 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -1796,6 +1796,7 @@ def fit(self, X, y=None): self.fit_transform(X) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Fit the model from data in X and return the transformed data. @@ -1813,8 +1814,6 @@ def fit_transform(self, X, y=None): V : ndarray of shape (n_samples, n_components) Transformed data. """ - self._validate_params() - _check_positive_coding(method=self.fit_algorithm, positive=self.positive_code) method = "lasso_" + self.fit_algorithm @@ -2435,6 +2434,7 @@ def fit(self, X, y=None): return self + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Update the model using the data in X as a mini-batch. @@ -2454,9 +2454,6 @@ def partial_fit(self, X, y=None): """ has_components = hasattr(self, "components_") - if not has_components: - self._validate_params() - X = self._validate_data( X, dtype=[np.float64, np.float32], order="C", reset=not has_components ) diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py index a6507d167b9cb..8c3d590b2c814 100644 --- a/sklearn/decomposition/_factor_analysis.py +++ b/sklearn/decomposition/_factor_analysis.py @@ -27,6 +27,7 @@ from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils._param_validation import Interval, StrOptions from ..utils.extmath import fast_logdet, randomized_svd, squared_norm @@ -197,6 +198,7 @@ def __init__( self.random_state = random_state self.rotation = rotation + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the FactorAnalysis model to X using SVD based approach. @@ -213,8 +215,6 @@ def fit(self, X, y=None): self : object FactorAnalysis class instance. """ - self._validate_params() - X = self._validate_data(X, copy=self.copy, dtype=np.float64) n_samples, n_features = X.shape diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 680a6cd8bbee1..6dcf62c0ace3b 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -16,6 +16,7 @@ from scipy import linalg from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..exceptions import ConvergenceWarning from ..utils import check_array, as_float_array, check_random_state from ..utils.validation import check_is_fitted @@ -672,6 +673,7 @@ def g(x, fun_args): return S + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Fit the model and recover the sources from X. @@ -690,10 +692,9 @@ def fit_transform(self, X, y=None): Estimated sources obtained by transforming the data with the estimated unmixing matrix. """ - self._validate_params() - return self._fit_transform(X, compute_sources=True) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model to X. @@ -711,8 +712,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - self._fit_transform(X, compute_sources=False) return self diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py index d98a5f4fb3b7a..5ae5d58b06ca4 100644 --- a/sklearn/decomposition/_incremental_pca.py +++ b/sklearn/decomposition/_incremental_pca.py @@ -9,6 +9,7 @@ from scipy import linalg, sparse from ._base import _BasePCA +from ..base import _fit_context from ..utils import gen_batches from ..utils._param_validation import Interval from ..utils.extmath import svd_flip, _incremental_mean_and_var @@ -192,6 +193,7 @@ def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=Non self.copy = copy self.batch_size = batch_size + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X, using minibatches of size batch_size. @@ -209,8 +211,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - self.components_ = None self.n_samples_seen_ = 0 self.mean_ = 0.0 @@ -243,6 +243,7 @@ def fit(self, X, y=None): return self + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None, check_input=True): """Incremental fit with X. All of X is processed as a single batch. @@ -265,9 +266,6 @@ def partial_fit(self, X, y=None, check_input=True): """ first_pass = not hasattr(self, "components_") - if first_pass: - self._validate_params() - if check_input: if sparse.issparse(X): raise TypeError( diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py index fadcd6f94a2f8..61d502a006c5e 100644 --- a/sklearn/decomposition/_kernel_pca.py +++ b/sklearn/decomposition/_kernel_pca.py @@ -19,6 +19,7 @@ from ..utils._param_validation import Interval, StrOptions from ..exceptions import NotFittedError from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..preprocessing import KernelCenterer from ..metrics.pairwise import pairwise_kernels @@ -404,6 +405,7 @@ def _fit_inverse_transform(self, X_transformed, X): self.dual_coef_ = linalg.solve(K, X, assume_a="pos", overwrite_a=True) self.X_transformed_fit_ = X_transformed + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model from data in X. @@ -421,8 +423,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - if self.fit_inverse_transform and self.kernel == "precomputed": raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.") X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X) diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index 21829d4fedab3..ab1ea5ebb5460 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -18,6 +18,7 @@ from joblib import effective_n_jobs from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils import check_random_state, gen_batches, gen_even_slices from ..utils.validation import check_non_negative from ..utils.validation import check_is_fitted @@ -568,6 +569,7 @@ def _check_non_neg_array(self, X, reset_n_features, whom): return X + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Online VB with Mini-Batch update. @@ -586,9 +588,6 @@ def partial_fit(self, X, y=None): """ first_time = not hasattr(self, "components_") - if first_time: - self._validate_params() - X = self._check_non_neg_array( X, reset_n_features=first_time, whom="LatentDirichletAllocation.partial_fit" ) @@ -618,6 +617,7 @@ def partial_fit(self, X, y=None): return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Learn model for the data X with variational Bayes method. @@ -637,7 +637,6 @@ def fit(self, X, y=None): self Fitted estimator. """ - self._validate_params() X = self._check_non_neg_array( X, reset_n_features=True, whom="LatentDirichletAllocation.fit" ) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 67dd0c2ab7b70..6872f810193f0 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -19,6 +19,7 @@ from ._cdnmf_fast import _update_cdnmf_fast from .._config import config_context from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..exceptions import ConvergenceWarning from ..utils import check_random_state, check_array, gen_batches from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm @@ -1539,6 +1540,7 @@ def _check_params(self, X): return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None, W=None, H=None): """Learn a NMF model for the data X and returns the transformed data. @@ -1566,8 +1568,6 @@ def fit_transform(self, X, y=None, W=None, H=None): W : ndarray of shape (n_samples, n_components) Transformed data. """ - self._validate_params() - X = self._validate_data( X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32] ) @@ -2123,6 +2123,7 @@ def _minibatch_convergence( return False + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None, W=None, H=None): """Learn a NMF model for the data X and returns the transformed data. @@ -2149,8 +2150,6 @@ def fit_transform(self, X, y=None, W=None, H=None): W : ndarray of shape (n_samples, n_components) Transformed data. """ - self._validate_params() - X = self._validate_data( X, accept_sparse=("csr", "csc"), dtype=[np.float64, np.float32] ) @@ -2288,6 +2287,7 @@ def transform(self, X): return W + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None, W=None, H=None): """Update the model using the data in `X` as a mini-batch. @@ -2321,9 +2321,6 @@ def partial_fit(self, X, y=None, W=None, H=None): """ has_components = hasattr(self, "components_") - if not has_components: - self._validate_params() - X = self._validate_data( X, accept_sparse=("csr", "csc"), diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py index e8c302fc47129..1d3c0678aca89 100644 --- a/sklearn/decomposition/_pca.py +++ b/sklearn/decomposition/_pca.py @@ -20,6 +20,7 @@ from scipy.sparse.linalg import svds from ._base import _BasePCA +from ..base import _fit_context from ..utils import check_random_state from ..utils._arpack import _init_arpack_v0 from ..utils.deprecation import deprecated @@ -414,6 +415,7 @@ def __init__( def n_features_(self): return self.n_features_in_ + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X. @@ -431,11 +433,10 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - self._fit(X) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Fit the model with X and apply the dimensionality reduction on X. @@ -458,8 +459,6 @@ def fit_transform(self, X, y=None): This method returns a Fortran-ordered array. To convert it to a C-ordered array, use 'np.ascontiguousarray'. """ - self._validate_params() - U, S, Vt = self._fit(X) U = U[:, : self.n_components_] diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 5974b86381e1a..93e4a2164a87f 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -12,6 +12,7 @@ from ..utils.validation import check_array, check_is_fitted from ..linear_model import ridge_regression from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ._dict_learning import dict_learning, MiniBatchDictionaryLearning @@ -53,6 +54,7 @@ def __init__( self.verbose = verbose self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model from data in X. @@ -70,7 +72,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() random_state = check_random_state(self.random_state) X = self._validate_data(X) diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index 999266a4f3f78..67f5c73028f15 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -12,6 +12,7 @@ from scipy.sparse.linalg import svds from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils import check_array, check_random_state from ..utils._arpack import _init_arpack_v0 from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip @@ -200,10 +201,10 @@ def fit(self, X, y=None): self : object Returns the transformer object. """ - # param validation is done in fit_transform self.fit_transform(X) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Fit model to X and perform dimensionality reduction on X. @@ -220,7 +221,6 @@ def fit_transform(self, X, y=None): X_new : ndarray of shape (n_samples, n_components) Reduced version of X. This will always be a dense array. """ - self._validate_params() X = self._validate_data(X, accept_sparse=["csr", "csc"], ensure_min_features=2) random_state = check_random_state(self.random_state) diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 066415e3321c5..7a1fa3dd66031 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -17,6 +17,7 @@ from .base import BaseEstimator, TransformerMixin, ClassifierMixin from .base import ClassNamePrefixFeaturesOutMixin +from .base import _fit_context from .linear_model._base import LinearClassifierMixin from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance from .utils.multiclass import unique_labels @@ -546,6 +547,10 @@ def _solve_svd(self, X, y): self.coef_ = coef @ self.scalings_.T self.intercept_ -= self.xbar_ @ self.coef_.T + @_fit_context( + # LinearDiscriminantAnalysis.covariance_estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit the Linear Discriminant Analysis model. @@ -568,8 +573,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - self._validate_params() - xp, _ = get_namespace(X) X, y = self._validate_data( @@ -862,6 +865,7 @@ def __init__( self.store_covariance = store_covariance self.tol = tol + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model according to the given training data and parameters. @@ -886,7 +890,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - self._validate_params() X, y = self._validate_data(X, y) check_classification_targets(y) self.classes_, y = np.unique(y, return_inverse=True) diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 25f910e8419f4..0d8519484d7a5 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -11,6 +11,7 @@ from .base import BaseEstimator, ClassifierMixin, RegressorMixin from .base import MultiOutputMixin +from .base import _fit_context from .utils import check_random_state from .utils._param_validation import StrOptions, Interval from .utils.validation import _num_samples @@ -142,6 +143,7 @@ def __init__(self, *, strategy="prior", random_state=None, constant=None): self.random_state = random_state self.constant = constant + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the baseline classifier. @@ -161,8 +163,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() - self._strategy = self.strategy if self._strategy == "uniform" and sp.issparse(y): @@ -518,6 +518,7 @@ def __init__(self, *, strategy="mean", constant=None, quantile=None): self.constant = constant self.quantile = quantile + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the random regressor. @@ -537,8 +538,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - y = check_array(y, ensure_2d=False, input_name="y") if len(y) == 0: raise ValueError("y must not be empty.") diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index bad6dcfb033ec..0354413fdebfe 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -14,6 +14,7 @@ from ._base import BaseEnsemble, _partition_estimators from ..base import ClassifierMixin, RegressorMixin +from ..base import _fit_context from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, column_or_1d @@ -301,6 +302,10 @@ def __init__( self.random_state = random_state self.verbose = verbose + @_fit_context( + # BaseBagging.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Build a Bagging ensemble of estimators from the training set (X, y). @@ -324,9 +329,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - - self._validate_params() - # Convert data (X is required to be 2d and indexable) X, y = self._validate_data( X, diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index f146530460dfa..ce3a6f78b241d 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -51,6 +51,7 @@ class calls the ``fit`` method of each sub-estimator on random samples from ..base import is_classifier from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin +from ..base import _fit_context from ..metrics import accuracy_score, r2_score from ..preprocessing import OneHotEncoder @@ -311,6 +312,7 @@ def decision_path(self, X): return sparse_hstack(indicators).tocsr(), n_nodes_ptr + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """ Build a forest of trees from the training set (X, y). @@ -338,8 +340,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - # Validate or convert input data if issparse(y): raise ValueError("sparse multilabel-indicator for y is not supported.") @@ -2737,6 +2737,7 @@ def fit(self, X, y=None, sample_weight=None): self.fit_transform(X, y, sample_weight=sample_weight) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None, sample_weight=None): """ Fit estimator and transform dataset. @@ -2762,8 +2763,6 @@ def fit_transform(self, X, y=None, sample_weight=None): X_transformed : sparse matrix of shape (n_samples, n_out) Transformed dataset. """ - self._validate_params() - rnd = check_random_state(self.random_state) y = rnd.uniform(size=_num_samples(X)) super().fit(X, y, sample_weight=sample_weight) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index df9904c8a9aa4..1b924749f52bd 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -28,6 +28,7 @@ from ._base import BaseEnsemble from ..base import ClassifierMixin, RegressorMixin from ..base import is_classifier +from ..base import _fit_context from ._gradient_boosting import predict_stages from ._gradient_boosting import predict_stage @@ -376,6 +377,10 @@ def _check_initialized(self): """Check that the estimator is initialized, raising an error if not.""" check_is_fitted(self) + @_fit_context( + # GradientBoosting*.init is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None, monitor=None): """Fit the gradient boosting model. @@ -412,8 +417,6 @@ def fit(self, X, y, sample_weight=None, monitor=None): self : object Fitted estimator. """ - self._validate_params() - if not self.warm_start: self._clear_state() diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 976335ea684d0..79b640057abe5 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -18,6 +18,7 @@ PinballLoss, ) from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier +from ...base import _fit_context from ...utils import check_random_state, resample, compute_sample_weight from ...utils.validation import ( check_is_fitted, @@ -336,6 +337,7 @@ def _check_interaction_cst(self, n_features): return constraints + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the gradient boosting model. @@ -357,8 +359,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - fit_start_time = time() acc_find_split_time = 0.0 # time spent finding the best splits acc_apply_split_time = 0.0 # time spent splitting nodes diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index bb016fa33185b..048a1d69395e2 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -20,6 +20,7 @@ from ..utils._param_validation import RealNotInt from ..utils.validation import check_is_fitted, _num_samples from ..base import OutlierMixin +from ..base import _fit_context from ._bagging import BaseBagging @@ -265,6 +266,7 @@ def _parallel_args(self): # copies. return {"prefer": "threads"} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """ Fit estimator. @@ -287,7 +289,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() X = self._validate_data(X, accept_sparse=["csc"], dtype=tree_dtype) if issparse(X): # Pre-sort indices to avoid that each individual tree of the diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 10f7a606f20c9..5b3486edfeb33 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,6 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor +from ..base import _fit_context from ..exceptions import NotFittedError from ..utils._estimator_html_repr import _VisualBlock @@ -159,6 +160,10 @@ def _method_name(name, estimator, method): return method_name + @_fit_context( + # estimators in Stacking*.estimators are not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Fit the estimators. @@ -184,9 +189,6 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ - - self._validate_params() - # all_estimators contains all estimators, the one to be fitted and the # 'drop' string. names, all_estimators = self._validate_estimators() diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 1c250cbe11a06..f8f4d2c4c197f 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -22,6 +22,7 @@ from ..base import RegressorMixin from ..base import TransformerMixin from ..base import clone +from ..base import _fit_context from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble from ..preprocessing import LabelEncoder @@ -308,6 +309,10 @@ def __init__( self.flatten_transform = flatten_transform self.verbose = verbose + @_fit_context( + # estimators in VotingClassifier.estimators are not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Fit the estimators. @@ -332,7 +337,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() check_classification_targets(y) if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1: raise NotImplementedError( @@ -572,6 +576,10 @@ def __init__(self, estimators, *, weights=None, n_jobs=None, verbose=False): self.n_jobs = n_jobs self.verbose = verbose + @_fit_context( + # estimators in VotingRegressor.estimators are not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Fit the estimators. @@ -594,7 +602,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() y = column_or_1d(y, warn=True) return super().fit(X, y, sample_weight) diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index b2aff503b0bb0..569609e6326e5 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -34,7 +34,7 @@ from ._base import BaseEnsemble from ..base import ClassifierMixin, RegressorMixin, is_classifier, is_regressor - +from ..base import _fit_context from ..tree import DecisionTreeClassifier, DecisionTreeRegressor from ..utils import check_random_state, _safe_indexing from ..utils.extmath import softmax @@ -103,6 +103,10 @@ def _check_X(self, X): reset=False, ) + @_fit_context( + # AdaBoost*.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Build a boosted classifier/regressor from the training set (X, y). @@ -124,8 +128,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - X, y = self._validate_data( X, y, diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index b51ccceaac9d1..60e2cb3b7ad84 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -11,6 +11,7 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils import check_array from ..utils.validation import check_is_fitted @@ -133,6 +134,7 @@ def _add_iterable_element( indices.append(vocab[feature_name]) values.append(self.dtype(vv)) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Learn a list of feature name -> indices mappings. @@ -153,7 +155,6 @@ def fit(self, X, y=None): self : object DictVectorizer class instance. """ - self._validate_params() feature_names = [] vocab = {} @@ -286,6 +287,7 @@ def _transform(self, X, fitting): return result_matrix + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Learn a list of feature name -> indices mappings and transform X. @@ -309,7 +311,6 @@ def fit_transform(self, X, y=None): Xa : {array, sparse matrix} Feature vectors; always 2-d. """ - self._validate_params() return self._transform(X, fitting=True) def inverse_transform(self, X, dict_type=dict): diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py index 1f2513e70eed5..e1b5e5f2561fe 100644 --- a/sklearn/feature_extraction/_hash.py +++ b/sklearn/feature_extraction/_hash.py @@ -8,6 +8,7 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ._hashing_fast import transform as _hashing_transform from ..utils._param_validation import Interval, StrOptions @@ -121,6 +122,7 @@ def __init__( self.n_features = n_features self.alternate_sign = alternate_sign + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X=None, y=None): """Only validates estimator's parameters. @@ -140,8 +142,6 @@ def fit(self, X=None, y=None): self : object FeatureHasher class instance. """ - # repeat input validation for grid search (which calls set_params) - self._validate_params() return self def transform(self, raw_X): diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index 89bdd7557f583..beea3e23e0adc 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -16,6 +16,7 @@ from numpy.lib.stride_tricks import as_strided from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils import check_array, check_random_state from ..utils._param_validation import Hidden, Interval, validate_params from ..utils._param_validation import RealNotInt @@ -561,6 +562,7 @@ def __init__(self, *, patch_size=None, max_patches=None, random_state=None): self.max_patches = max_patches self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Only validate the parameters of the estimator. @@ -583,7 +585,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() return self def transform(self, X): diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 21863d75eff2f..3201e3a0d51bb 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -25,6 +25,7 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from ..base import _fit_context from ..preprocessing import normalize from ._hash import FeatureHasher from ._stop_words import ENGLISH_STOP_WORDS @@ -801,6 +802,7 @@ def __init__( self.alternate_sign = alternate_sign self.dtype = dtype + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Only validates estimator's parameters. @@ -820,10 +822,9 @@ def partial_fit(self, X, y=None): self : object HashingVectorizer instance. """ - # TODO: only validate during the first call - self._validate_params() return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Only validates estimator's parameters. @@ -843,8 +844,6 @@ def fit(self, X, y=None): self : object HashingVectorizer instance. """ - self._validate_params() - # triggers a parameter validation if isinstance(X, str): raise ValueError( @@ -1338,6 +1337,7 @@ def fit(self, raw_documents, y=None): self.fit_transform(raw_documents) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, raw_documents, y=None): """Learn the vocabulary dictionary and return document-term matrix. @@ -1365,7 +1365,6 @@ def fit_transform(self, raw_documents, y=None): "Iterable over raw text documents expected, string object received." ) - self._validate_params() self._validate_ngram_range() self._warn_for_unused_params() self._validate_vocabulary() @@ -1639,6 +1638,7 @@ def __init__(self, *, norm="l2", use_idf=True, smooth_idf=True, sublinear_tf=Fal self.smooth_idf = smooth_idf self.sublinear_tf = sublinear_tf + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Learn the idf vector (global term weights). @@ -1655,8 +1655,6 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() - # large sparse data is not supported for 32bit platforms because # _document_frequency uses np.bincount which works on arrays of # dtype NPY_INTP which is int32 for 32bit platforms. See #20923 @@ -2073,6 +2071,7 @@ def _check_params(self): UserWarning, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, raw_documents, y=None): """Learn vocabulary and idf from training set. @@ -2089,7 +2088,6 @@ def fit(self, raw_documents, y=None): self : object Fitted vectorizer. """ - self._validate_params() self._check_params() self._warn_for_unused_params() self._tfidf = TfidfTransformer( diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 7b8de4ae03585..47f98d89e8abe 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -9,6 +9,7 @@ from ._base import SelectorMixin from ._base import _get_feature_importances from ..base import BaseEstimator, clone, MetaEstimatorMixin +from ..base import _fit_context from ..utils._tags import _safe_tags from ..utils.validation import check_is_fitted, check_scalar, _num_features from ..utils._param_validation import HasMethods, Interval, Options @@ -320,6 +321,10 @@ def _check_max_features(self, X): ) self.max_features_ = max_features + @_fit_context( + # SelectFromModel.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, **fit_params): """Fit the SelectFromModel meta-transformer. @@ -340,7 +345,6 @@ def fit(self, X, y=None, **fit_params): self : object Fitted estimator. """ - self._validate_params() self._check_max_features(X) if self.prefit: @@ -375,6 +379,10 @@ def threshold_(self): return _calculate_threshold(self.estimator, scores, self.threshold) @available_if(_estimator_has("partial_fit")) + @_fit_context( + # SelectFromModel.estimator is not validated yet + prefer_skip_nested_validation=False + ) def partial_fit(self, X, y=None, **fit_params): """Fit the SelectFromModel meta-transformer only once. @@ -398,7 +406,6 @@ def partial_fit(self, X, y=None, **fit_params): first_call = not hasattr(self, "estimator_") if first_call: - self._validate_params() self._check_max_features(X) if self.prefit: diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 214ac9e0c30cf..932d66449ae22 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -22,6 +22,7 @@ from ..base import MetaEstimatorMixin from ..base import clone from ..base import is_classifier +from ..base import _fit_context from ..model_selection import check_cv from ..model_selection._validation import _score from ..metrics import check_scoring @@ -228,6 +229,10 @@ def classes_(self): """ return self.estimator_.classes_ + @_fit_context( + # RFE.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, **fit_params): """Fit the RFE model and then the underlying estimator on the selected features. @@ -248,7 +253,6 @@ def fit(self, X, y, **fit_params): self : object Fitted estimator. """ - self._validate_params() return self._fit(X, y, **fit_params) def _fit(self, X, y, step_score=None, **fit_params): @@ -649,6 +653,10 @@ def __init__( self.n_jobs = n_jobs self.min_features_to_select = min_features_to_select + @_fit_context( + # RFECV.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, groups=None): """Fit the RFE model and automatically tune the number of selected features. @@ -674,7 +682,6 @@ def fit(self, X, y, groups=None): self : object Fitted estimator. """ - self._validate_params() tags = self._get_tags() X, y = self._validate_data( X, diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 32193c3d415c7..df4ffe1337ed3 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -7,6 +7,7 @@ from ._base import SelectorMixin from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier +from ..base import _fit_context from ..utils._param_validation import HasMethods, Interval, StrOptions from ..utils._param_validation import RealNotInt from ..utils._tags import _safe_tags @@ -179,6 +180,10 @@ def __init__( self.cv = cv self.n_jobs = n_jobs + @_fit_context( + # SequentialFeatureSelector.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Learn the features to select from X. @@ -197,8 +202,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - tags = self._get_tags() X = self._validate_data( X, diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index 18e23d105b8bb..f4355c39f88cd 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -13,6 +13,7 @@ from scipy.sparse import issparse from ..base import BaseEstimator +from ..base import _fit_context from ..preprocessing import LabelBinarizer from ..utils import as_float_array, check_array, check_X_y, safe_sqr, safe_mask from ..utils.extmath import safe_sparse_dot, row_norms @@ -473,6 +474,7 @@ class _BaseFilter(SelectorMixin, BaseEstimator): def __init__(self, score_func): self.score_func = score_func + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Run score function on (X, y) and get the appropriate features. @@ -490,8 +492,6 @@ def fit(self, X, y): self : object Returns the instance itself. """ - self._validate_params() - X, y = self._validate_data( X, y, accept_sparse=["csr", "csc"], multi_output=True ) diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 7c8db9cc7fa55..073a22c6ad92b 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -4,6 +4,7 @@ import numpy as np from ..base import BaseEstimator +from ..base import _fit_context from ._base import SelectorMixin from ..utils.sparsefuncs import mean_variance_axis, min_max_axis from ..utils.validation import check_is_fitted @@ -76,6 +77,7 @@ class VarianceThreshold(SelectorMixin, BaseEstimator): def __init__(self, threshold=0.0): self.threshold = threshold + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Learn empirical variances from X. @@ -94,7 +96,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data( X, accept_sparse=("csr", "csc"), diff --git a/sklearn/gaussian_process/_gpc.py b/sklearn/gaussian_process/_gpc.py index 4a88034768870..50a8739372972 100644 --- a/sklearn/gaussian_process/_gpc.py +++ b/sklearn/gaussian_process/_gpc.py @@ -13,6 +13,7 @@ from scipy.special import erf, expit from ..base import BaseEstimator, ClassifierMixin, clone +from ..base import _fit_context from .kernels import Kernel, RBF, CompoundKernel, ConstantKernel as C from ..utils.validation import check_is_fitted from ..utils import check_random_state @@ -679,6 +680,7 @@ def __init__( self.multi_class = multi_class self.n_jobs = n_jobs + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit Gaussian process classification model. @@ -695,8 +697,6 @@ def fit(self, X, y): self : object Returns an instance of self. """ - self._validate_params() - if isinstance(self.kernel, CompoundKernel): raise ValueError("kernel cannot be a CompoundKernel") diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 9b7141f71b884..49fcab40c25f8 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -14,6 +14,7 @@ from ..base import BaseEstimator, RegressorMixin, clone from ..base import MultiOutputMixin +from ..base import _fit_context from .kernels import Kernel, RBF, ConstantKernel as C from ..preprocessing._data import _handle_zeros_in_scale from ..utils import check_random_state @@ -214,6 +215,7 @@ def __init__( self.n_targets = n_targets self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit Gaussian process regression model. @@ -230,8 +232,6 @@ def fit(self, X, y): self : object GaussianProcessRegressor class instance. """ - self._validate_params() - if self.kernel is None: # Use an RBF kernel as default self.kernel_ = C(1.0, constant_value_bounds="fixed") * RBF( 1.0, length_scale_bounds="fixed" diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index b2f296c91740e..37fc43731514a 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -11,6 +11,7 @@ from scipy import sparse as sp from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils._param_validation import StrOptions, MissingValues from ..utils.fixes import _mode from ..utils.sparsefuncs import _get_median @@ -348,6 +349,7 @@ def _validate_input(self, X, in_fit): return X + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the imputer on `X`. @@ -365,8 +367,6 @@ def fit(self, X, y=None): self : object Fitted estimator. """ - self._validate_params() - X = self._validate_input(X, in_fit=True) # default fill_value is 0 for numerical input and "missing_value" @@ -927,6 +927,7 @@ def _fit(self, X, y=None, precomputed=False): return missing_features_info[0] + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the transformer on `X`. @@ -944,7 +945,6 @@ def fit(self, X, y=None): self : object Fitted estimator. """ - self._validate_params() self._fit(X, y) return self @@ -990,6 +990,7 @@ def transform(self, X): return imputer_mask + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Generate missing values indicator for `X`. @@ -1008,7 +1009,6 @@ def fit_transform(self, X, y=None): The missing indicator for input data. The data type of `Xt` will be boolean. """ - self._validate_params() imputer_mask = self._fit(X, y) if self.features_.size < self._n_features: diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index 41ed19b7a8948..4dd3f861d3c96 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -7,6 +7,7 @@ import numpy as np from ..base import clone +from ..base import _fit_context from ..exceptions import ConvergenceWarning from ..preprocessing import normalize from ..utils import ( @@ -681,6 +682,10 @@ def _validate_limit(limit, limit_type, n_features): ) return limit + @_fit_context( + # IterativeImputer.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit_transform(self, X, y=None): """Fit the imputer on `X` and return the transformed `X`. @@ -698,7 +703,6 @@ def fit_transform(self, X, y=None): Xt : array-like, shape (n_samples, n_features) The imputed input data. """ - self._validate_params() self.random_state_ = getattr( self, "random_state_", check_random_state(self.random_state) ) diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py index 5735709dd7f29..915f8cbdb3fcb 100644 --- a/sklearn/impute/_knn.py +++ b/sklearn/impute/_knn.py @@ -6,6 +6,7 @@ import numpy as np from ._base import _BaseImputer +from ..base import _fit_context from ..utils.validation import FLOAT_DTYPES from ..metrics import pairwise_distances_chunked from ..metrics.pairwise import _NAN_METRICS @@ -199,6 +200,7 @@ def _calc_impute(self, dist_pot_donors, n_neighbors, fit_X_col, mask_fit_X_col): return np.ma.average(donors, axis=1, weights=weight_matrix).data + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the imputer on X. @@ -216,7 +218,6 @@ def fit(self, X, y=None): self : object The fitted `KNNImputer` class instance. """ - self._validate_params() # Check data integrity and calling arguments if not is_scalar_nan(self.missing_values): force_all_finite = True diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index aa1521ab697d0..a1cf95b95591b 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -11,6 +11,7 @@ import math from .base import BaseEstimator, TransformerMixin, RegressorMixin +from .base import _fit_context from .utils import check_array, check_consistent_length from .utils.validation import _check_sample_weight, check_is_fitted from .utils._param_validation import Interval, StrOptions @@ -310,6 +311,7 @@ def _build_y(self, X, y, sample_weight, trim_duplicates=True): # prediction speed). return X, y + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model using X, y as training data. @@ -338,7 +340,6 @@ def fit(self, X, y, sample_weight=None): X is stored for future use, as :meth:`transform` needs X to interpolate new input data. """ - self._validate_params() check_params = dict(accept_sparse=False, ensure_2d=False) X = check_array( X, input_name="X", dtype=[np.float64, np.float32], **check_params diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index faa098e634937..7f190a2b66823 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -23,6 +23,7 @@ from .base import BaseEstimator from .base import TransformerMixin from .base import ClassNamePrefixFeaturesOutMixin +from .base import _fit_context from .utils import check_random_state from .utils import deprecated from .utils.extmath import safe_sparse_dot @@ -139,6 +140,7 @@ def __init__( self.n_components = n_components self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X. @@ -160,8 +162,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - X = self._validate_data(X, accept_sparse="csc") random_state = check_random_state(self.random_state) @@ -338,6 +338,7 @@ def __init__(self, *, gamma=1.0, n_components=100, random_state=None): self.n_components = n_components self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X. @@ -358,8 +359,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - X = self._validate_data(X, accept_sparse="csr") random_state = check_random_state(self.random_state) n_features = X.shape[1] @@ -498,6 +497,7 @@ def __init__(self, *, skewedness=1.0, n_components=100, random_state=None): self.n_components = n_components self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X. @@ -518,7 +518,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X) random_state = check_random_state(self.random_state) n_features = X.shape[1] @@ -665,6 +664,7 @@ def __init__(self, *, sample_steps=2, sample_interval=None): self.sample_steps = sample_steps self.sample_interval = sample_interval + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Only validates estimator's parameters. @@ -686,7 +686,6 @@ def fit(self, X, y=None): self : object Returns the transformer. """ - self._validate_params() X = self._validate_data(X, accept_sparse="csr") check_non_negative(X, "X in AdditiveChi2Sampler.fit") @@ -1011,6 +1010,7 @@ def __init__( self.random_state = random_state self.n_jobs = n_jobs + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit estimator to data. @@ -1032,7 +1032,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X, accept_sparse="csr") rnd = check_random_state(self.random_state) n_samples = X.shape[0] diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index 111e62938f096..a7bfeefaef651 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -8,6 +8,7 @@ import numpy as np from .base import BaseEstimator, RegressorMixin, MultiOutputMixin +from .base import _fit_context from .utils._param_validation import Interval, StrOptions from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels from .linear_model._ridge import _solve_cholesky_kernel @@ -170,6 +171,7 @@ def _get_kernel(self, X, Y=None): def _more_tags(self): return {"pairwise": self.kernel == "precomputed"} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Kernel Ridge regression model. @@ -190,8 +192,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() - # Convert data X, y = self._validate_data( X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 06d8664dc013b..92c067c850225 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -28,6 +28,7 @@ from numbers import Integral from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin +from ..base import _fit_context from ..preprocessing._data import _is_constant_feature from ..utils import check_array from ..utils.validation import FLOAT_DTYPES @@ -642,6 +643,7 @@ def __init__( self.n_jobs = n_jobs self.positive = positive + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """ Fit linear model. @@ -665,9 +667,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted Estimator. """ - - self._validate_params() - n_jobs_ = self.n_jobs accept_sparse = False if self.positive else ["csr", "csc", "coo"] diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 887c6a3ebcbbc..37dc3b81511f5 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -13,6 +13,7 @@ from ._base import LinearModel, _preprocess_data, _rescale_data from ..base import RegressorMixin +from ..base import _fit_context from ..utils.extmath import fast_logdet from scipy.linalg import pinvh from ..utils.validation import _check_sample_weight @@ -267,6 +268,7 @@ def __init__( self.verbose = verbose self.n_iter = n_iter + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model. @@ -288,8 +290,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() - max_iter = _deprecate_n_iter(self.n_iter, self.max_iter) X, y = self._validate_data(X, y, dtype=[np.float64, np.float32], y_numeric=True) @@ -665,6 +665,7 @@ def __init__( self.verbose = verbose self.n_iter = n_iter + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model according to the given training data and parameters. @@ -683,9 +684,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - - self._validate_params() - max_iter = _deprecate_n_iter(self.n_iter, self.max_iter) X, y = self._validate_data( diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index ea1ee3115ea93..829c0ab6149f1 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -18,6 +18,7 @@ from ._base import LinearModel, _pre_fit from ..base import RegressorMixin, MultiOutputMixin +from ..base import _fit_context from ._base import _preprocess_data from ..utils import check_array, check_scalar from ..utils.validation import check_random_state @@ -851,6 +852,7 @@ def __init__( self.random_state = random_state self.selection = selection + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None, check_input=True): """Fit model with coordinate descent. @@ -886,8 +888,6 @@ def fit(self, X, y, sample_weight=None, check_input=True): To avoid memory re-allocation it is advised to allocate the initial data in memory directly using that format. """ - self._validate_params() - if self.alpha == 0: warnings.warn( ( @@ -1475,6 +1475,7 @@ def _is_multitask(self): def path(X, y, **kwargs): """Compute path with coordinate descent.""" + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit linear model with coordinate descent. @@ -1502,9 +1503,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns an instance of fitted model. """ - - self._validate_params() - # This makes sure that there is no duplication in memory. # Dealing right with copy_X is important in the following: # Multiple functions touch X and subsamples of X and can induce a @@ -2343,6 +2341,7 @@ def __init__( self.random_state = random_state self.selection = selection + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit MultiTaskElasticNet model with coordinate descent. @@ -2367,8 +2366,6 @@ def fit(self, X, y): To avoid memory re-allocation it is advised to allocate the initial data in memory directly using that format. """ - self._validate_params() - # Need to validate separately here. # We can't pass multi_output=True because that would allow y to be csr. check_X_params = dict( diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index caf37a0f473e0..b1bc460f24dff 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -20,6 +20,7 @@ HalfTweedieLossIdentity, ) from ...base import BaseEstimator, RegressorMixin +from ...base import _fit_context from ...utils import check_array from ...utils._openmp_helpers import _openmp_effective_n_threads from ...utils._param_validation import Hidden, Interval, StrOptions @@ -168,6 +169,7 @@ def __init__( self.warm_start = warm_start self.verbose = verbose + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit a Generalized Linear Model. @@ -187,8 +189,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted model. """ - self._validate_params() - X, y = self._validate_data( X, y, diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index a7b848f647560..def2ae273d5c4 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -7,6 +7,7 @@ from scipy import optimize from ..base import BaseEstimator, RegressorMixin +from ..base import _fit_context from ._base import LinearModel from ..utils import axis0_safe_slice from ..utils._param_validation import Interval @@ -273,6 +274,7 @@ def __init__( self.fit_intercept = fit_intercept self.tol = tol + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. @@ -293,7 +295,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted `HuberRegressor` estimator. """ - self._validate_params() X, y = self._validate_data( X, y, diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 4be8bb730a0ae..e6c653eb80bb3 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -20,6 +20,7 @@ from ._base import LinearModel, LinearRegression from ._base import _deprecate_normalize, _preprocess_data from ..base import RegressorMixin, MultiOutputMixin +from ..base import _fit_context # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs' from ..utils import arrayfuncs, as_float_array # type: ignore @@ -1097,6 +1098,7 @@ def _fit(self, X, y, max_iter, alpha, fit_path, normalize, Xy=None): self._set_intercept(X_offset, y_offset, X_scale) return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, Xy=None): """Fit the model using X, y as training data. @@ -1118,8 +1120,6 @@ def fit(self, X, y, Xy=None): self : object Returns an instance of self. """ - self._validate_params() - X, y = self._validate_data(X, y, y_numeric=True, multi_output=True) _normalize = _deprecate_normalize( @@ -1691,6 +1691,7 @@ def __init__( def _more_tags(self): return {"multioutput": False} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model using X, y as training data. @@ -1707,8 +1708,6 @@ def fit(self, X, y): self : object Returns an instance of self. """ - self._validate_params() - _normalize = _deprecate_normalize( self.normalize, estimator_name=self.__class__.__name__ ) @@ -2216,6 +2215,7 @@ def __init__( def _more_tags(self): return {"multioutput": False} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, copy_X=None): """Fit the model using X, y as training data. @@ -2237,8 +2237,6 @@ def fit(self, X, y, copy_X=None): self : object Returns an instance of self. """ - self._validate_params() - _normalize = _deprecate_normalize( self.normalize, estimator_name=self.__class__.__name__ ) diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index a00004ae17676..4d53ae68ad9ef 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -24,6 +24,7 @@ from ._linear_loss import LinearModelLoss from ._sag import sag_solver from ._glm.glm import NewtonCholeskySolver +from ..base import _fit_context from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss from ..preprocessing import LabelEncoder, LabelBinarizer from ..svm._base import _fit_liblinear @@ -1129,6 +1130,7 @@ def __init__( self.n_jobs = n_jobs self.l1_ratio = l1_ratio + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """ Fit the model according to the given training data. @@ -1158,9 +1160,6 @@ def fit(self, X, y, sample_weight=None): ----- The SAGA solver supports both float64 and float32 bit arrays. """ - - self._validate_params() - solver = _check_solver(self.solver, self.penalty, self.dual) if self.penalty != "elasticnet" and self.l1_ratio is not None: @@ -1742,6 +1741,7 @@ def __init__( self.random_state = random_state self.l1_ratios = l1_ratios + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. @@ -1763,9 +1763,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted LogisticRegressionCV estimator. """ - - self._validate_params() - solver = _check_solver(self.solver, self.penalty, self.dual) if self.penalty == "elasticnet": diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py index b1dc1e352fd62..df451a99417b0 100644 --- a/sklearn/linear_model/_omp.py +++ b/sklearn/linear_model/_omp.py @@ -15,6 +15,7 @@ from ._base import LinearModel, _pre_fit, _deprecate_normalize from ..base import RegressorMixin, MultiOutputMixin +from ..base import _fit_context from ..utils import as_float_array, check_array from ..utils.parallel import delayed, Parallel from ..utils._param_validation import Hidden, Interval, StrOptions @@ -725,6 +726,7 @@ def __init__( self.normalize = normalize self.precompute = precompute + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model using X, y as training data. @@ -741,8 +743,6 @@ def fit(self, X, y): self : object Returns an instance of self. """ - self._validate_params() - _normalize = _deprecate_normalize( self.normalize, estimator_name=self.__class__.__name__ ) @@ -1042,6 +1042,7 @@ def __init__( self.n_jobs = n_jobs self.verbose = verbose + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model using X, y as training data. @@ -1058,8 +1059,6 @@ def fit(self, X, y): self : object Returns an instance of self. """ - self._validate_params() - _normalize = _deprecate_normalize( self.normalize, estimator_name=self.__class__.__name__ ) diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 2cacd4f78cc54..a9c81799c8ca3 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -5,6 +5,7 @@ from ._stochastic_gradient import BaseSGDClassifier from ._stochastic_gradient import BaseSGDRegressor from ._stochastic_gradient import DEFAULT_EPSILON +from ..base import _fit_context from ..utils._param_validation import Interval, StrOptions @@ -220,6 +221,7 @@ def __init__( self.C = C self.loss = loss + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, classes=None): """Fit linear model with Passive Aggressive algorithm. @@ -245,7 +247,6 @@ def partial_fit(self, X, y, classes=None): Fitted estimator. """ if not hasattr(self, "classes_"): - self._validate_params() self._more_validate_params(for_partial_fit=True) if self.class_weight == "balanced": @@ -276,6 +277,7 @@ def partial_fit(self, X, y, classes=None): intercept_init=None, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, coef_init=None, intercept_init=None): """Fit linear model with Passive Aggressive algorithm. @@ -298,7 +300,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None): self : object Fitted estimator. """ - self._validate_params() self._more_validate_params() lr = "pa1" if self.loss == "hinge" else "pa2" @@ -504,6 +505,7 @@ def __init__( self.C = C self.loss = loss + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y): """Fit linear model with Passive Aggressive algorithm. @@ -521,7 +523,6 @@ def partial_fit(self, X, y): Fitted estimator. """ if not hasattr(self, "coef_"): - self._validate_params() self._more_validate_params(for_partial_fit=True) lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2" @@ -538,6 +539,7 @@ def partial_fit(self, X, y): intercept_init=None, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, coef_init=None, intercept_init=None): """Fit linear model with Passive Aggressive algorithm. @@ -560,7 +562,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None): self : object Fitted estimator. """ - self._validate_params() self._more_validate_params() lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2" diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py index 081e3da5b51b7..b4a5581386a5f 100644 --- a/sklearn/linear_model/_quantile.py +++ b/sklearn/linear_model/_quantile.py @@ -9,6 +9,7 @@ from scipy.optimize import linprog from ..base import BaseEstimator, RegressorMixin +from ..base import _fit_context from ._base import LinearModel from ..exceptions import ConvergenceWarning from ..utils import _safe_indexing @@ -141,6 +142,7 @@ def __init__( self.solver = solver self.solver_options = solver_options + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. @@ -160,7 +162,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns self. """ - self._validate_params() X, y = self._validate_data( X, y, diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 0e11a47b442a6..1e17a65b4ede3 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -9,6 +9,7 @@ from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone from ..base import MultiOutputMixin +from ..base import _fit_context from ..utils import check_random_state, check_consistent_length from ..utils.random import sample_without_replacement from ..utils.validation import check_is_fitted, _check_sample_weight @@ -297,6 +298,10 @@ def __init__( self.loss = loss self.base_estimator = base_estimator + @_fit_context( + # RansacRegressor.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None): """Fit estimator using RANSAC algorithm. @@ -327,8 +332,6 @@ def fit(self, X, y, sample_weight=None): `is_data_valid` and `is_model_valid` return False for all `max_trials` randomly chosen sub-samples. """ - self._validate_params() - # Need to validate separately here. We can't pass multi_output=True # because that would allow y to be csr. Delay expensive finiteness # check to the estimator's own input validation. diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 28ef7cbd43eb7..893b10d1d93ae 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -25,6 +25,7 @@ from ._base import _preprocess_data, _rescale_data from ._sag import sag_solver from ..base import MultiOutputMixin, RegressorMixin, is_classifier +from ..base import _fit_context from ..utils.extmath import safe_sparse_dot from ..utils.extmath import row_norms from ..utils import check_array @@ -1114,6 +1115,7 @@ def __init__( random_state=random_state, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Ridge regression model. @@ -1134,8 +1136,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver) X, y = self._validate_data( X, @@ -1423,6 +1423,7 @@ def __init__( ) self.class_weight = class_weight + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Ridge classifier model. @@ -1446,8 +1447,6 @@ def fit(self, X, y, sample_weight=None): self : object Instance of the estimator. """ - self._validate_params() - X, y, sample_weight, Y = self._prepare_data(X, y, sample_weight, self.solver) super().fit(X, Y, sample_weight=sample_weight) @@ -2354,6 +2353,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): 0.5166... """ + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Ridge regression model with cv. @@ -2383,8 +2383,6 @@ def fit(self, X, y, sample_weight=None): cross-validation takes the sample weights into account when computing the validation score. """ - self._validate_params() - super().fit(X, y, sample_weight=sample_weight) return self @@ -2533,6 +2531,7 @@ def __init__( ) self.class_weight = class_weight + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Ridge classifier with cv. @@ -2555,8 +2554,6 @@ def fit(self, X, y, sample_weight=None): self : object Fitted estimator. """ - self._validate_params() - # `RidgeClassifier` does not accept "sag" or "saga" solver and thus support # csr, csc, and coo sparse matrices. By using solver="eigen" we force to accept # all sparse format. diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 2f27bdee7968b..bc8f31016c6f8 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -13,6 +13,7 @@ from numbers import Integral, Real from ..base import clone, is_classifier +from ..base import _fit_context from ._base import LinearClassifierMixin, SparseCoefMixin from ._base import make_dataset from ..base import BaseEstimator, RegressorMixin, OutlierMixin @@ -805,6 +806,7 @@ def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, max_iter self._standard_intercept = np.atleast_1d(self.intercept_) self.intercept_ = self._standard_intercept + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, classes=None, sample_weight=None): """Perform one epoch of stochastic gradient descent on given samples. @@ -839,7 +841,6 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): Returns an instance of self. """ if not hasattr(self, "classes_"): - self._validate_params() self._more_validate_params(for_partial_fit=True) if self.class_weight == "balanced": @@ -869,6 +870,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): intercept_init=None, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None): """Fit linear model with Stochastic Gradient Descent. @@ -897,7 +899,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None): self : object Returns an instance of self. """ - self._validate_params() self._more_validate_params() return self._fit( @@ -1470,6 +1471,7 @@ def _partial_fit( return self + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, sample_weight=None): """Perform one epoch of stochastic gradient descent on given samples. @@ -1496,7 +1498,6 @@ def partial_fit(self, X, y, sample_weight=None): Returns an instance of self. """ if not hasattr(self, "coef_"): - self._validate_params() self._more_validate_params(for_partial_fit=True) return self._partial_fit( @@ -1565,6 +1566,7 @@ def _fit( return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None): """Fit linear model with Stochastic Gradient Descent. @@ -1590,7 +1592,6 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None): self : object Fitted `SGDRegressor` estimator. """ - self._validate_params() self._more_validate_params() return self._fit( @@ -2366,6 +2367,7 @@ def _partial_fit( return self + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None, sample_weight=None): """Fit linear One-Class SVM with Stochastic Gradient Descent. @@ -2386,7 +2388,6 @@ def partial_fit(self, X, y=None, sample_weight=None): Returns a fitted instance of self. """ if not hasattr(self, "coef_"): - self._validate_params() self._more_validate_params(for_partial_fit=True) alpha = self.nu / 2 @@ -2453,6 +2454,7 @@ def _fit( return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None): """Fit linear One-Class SVM with Stochastic Gradient Descent. @@ -2485,7 +2487,6 @@ def fit(self, X, y=None, coef_init=None, offset_init=None, sample_weight=None): self : object Returns a fitted instance of self. """ - self._validate_params() self._more_validate_params() alpha = self.nu / 2 diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index 67d6ca532a8ab..72c2d897681c4 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -19,6 +19,7 @@ from ._base import LinearModel from ..base import RegressorMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils._param_validation import Interval from ..utils.parallel import delayed, Parallel @@ -395,6 +396,7 @@ def _check_subparams(self, n_samples, n_features): return n_subsamples, n_subpopulation + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit linear model. @@ -410,7 +412,6 @@ def fit(self, X, y): self : returns an instance of self. Fitted `TheilSenRegressor` estimator. """ - self._validate_params() random_state = check_random_state(self.random_state) X, y = self._validate_data(X, y, y_numeric=True) n_samples, n_features = X.shape diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py index 92206721aac15..3b86b643129d9 100644 --- a/sklearn/manifold/_isomap.py +++ b/sklearn/manifold/_isomap.py @@ -12,6 +12,7 @@ from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..neighbors import NearestNeighbors, kneighbors_graph from ..neighbors import radius_neighbors_graph from ..utils.validation import check_is_fitted @@ -332,6 +333,10 @@ def reconstruction_error(self): evals = self.kernel_pca_.eigenvalues_ return np.sqrt(np.sum(G_center**2) - np.sum(evals**2)) / G.shape[0] + @_fit_context( + # Isomap.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Compute the embedding vectors for data X. @@ -350,10 +355,13 @@ def fit(self, X, y=None): self : object Returns a fitted instance of self. """ - self._validate_params() self._fit_transform(X) return self + @_fit_context( + # Isomap.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit_transform(self, X, y=None): """Fit the model from data in X and transform X. @@ -371,7 +379,6 @@ def fit_transform(self, X, y=None): X_new : array-like, shape (n_samples, n_components) X transformed in the new space. """ - self._validate_params() self._fit_transform(X) return self.embedding_ diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 10a22b12dfd1d..6f57b0627b8be 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -17,6 +17,7 @@ TransformerMixin, _UnstableArchMixin, ClassNamePrefixFeaturesOutMixin, + _fit_context, ) from ..utils import check_random_state, check_array from ..utils._arpack import _init_arpack_v0 @@ -759,6 +760,7 @@ def _fit_transform(self, X): ) self._n_features_out = self.embedding_.shape[1] + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Compute the embedding vectors for data X. @@ -775,10 +777,10 @@ def fit(self, X, y=None): self : object Fitted `LocallyLinearEmbedding` class instance. """ - self._validate_params() self._fit_transform(X) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Compute the embedding vectors for data X and transform X. @@ -795,7 +797,6 @@ def fit_transform(self, X, y=None): X_new : array-like, shape (n_samples, n_components) Returns the instance itself. """ - self._validate_params() self._fit_transform(X) return self.embedding_ diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index 7fc46325a1ae1..6b7a818b94ea8 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -13,6 +13,7 @@ import warnings from ..base import BaseEstimator +from ..base import _fit_context from ..metrics import euclidean_distances from ..utils import check_random_state, check_array, check_symmetric from ..isotonic import IsotonicRegression @@ -569,10 +570,10 @@ def fit(self, X, y=None, init=None): self : object Fitted estimator. """ - # parameter will be validated in `fit_transform` call self.fit_transform(X, init=init) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None, init=None): """ Fit the data from `X`, and returns the embedded coordinates. @@ -597,7 +598,6 @@ def fit_transform(self, X, y=None, init=None): X_new : ndarray of shape (n_samples, n_components) X transformed in the new space. """ - self._validate_params() X = self._validate_data(X) if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed": warnings.warn( diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index 8291d8326eb05..af965a1362b8f 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -17,6 +17,7 @@ from scipy.sparse.csgraph import laplacian as csgraph_laplacian from ..base import BaseEstimator +from ..base import _fit_context from ..utils import ( check_array, check_random_state, @@ -652,6 +653,7 @@ def _get_affinity_matrix(self, X, Y=None): self.affinity_matrix_ = self.affinity(X) return self.affinity_matrix_ + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model from data in X. @@ -674,8 +676,6 @@ def fit(self, X, y=None): self : object Returns the instance itself. """ - self._validate_params() - X = self._validate_data(X, accept_sparse="csr", ensure_min_samples=2) random_state = check_random_state(self.random_state) diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index fb66651242d61..61fc036b9c25a 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -18,6 +18,7 @@ from numbers import Integral, Real from ..neighbors import NearestNeighbors from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils._openmp_helpers import _openmp_effective_n_threads from ..utils.validation import check_non_negative @@ -1097,6 +1098,10 @@ def _tsne( return X_embedded + @_fit_context( + # TSNE.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit_transform(self, X, y=None): """Fit X into an embedded space and return that transformed output. @@ -1118,12 +1123,15 @@ def fit_transform(self, X, y=None): X_new : ndarray of shape (n_samples, n_components) Embedding of the training data in low-dimensional space. """ - self._validate_params() self._check_params_vs_input(X) embedding = self._fit(X) self.embedding_ = embedding return self.embedding_ + @_fit_context( + # TSNE.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Fit X into an embedded space. @@ -1145,7 +1153,6 @@ def fit(self, X, y=None): X_new : array of shape (n_samples, n_components) Embedding of the training data in low-dimensional space. """ - self._validate_params() self.fit_transform(X) return self diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index a298dfec6a0da..fbca4f1d49dcd 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -16,6 +16,7 @@ from ..cluster import kmeans_plusplus from ..base import BaseEstimator from ..base import DensityMixin +from ..base import _fit_context from ..exceptions import ConvergenceWarning from ..utils import check_random_state from ..utils.validation import check_is_fitted @@ -182,6 +183,7 @@ def fit(self, X, y=None): self.fit_predict(X, y) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_predict(self, X, y=None): """Estimate model parameters using X and predict the labels for X. @@ -209,8 +211,6 @@ def fit_predict(self, X, y=None): labels : array, shape (n_samples,) Component labels. """ - self._validate_params() - X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_min_samples=2) if X.shape[0] < self.n_components: raise ValueError( diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 1621dd324f81c..695614f4e1fa0 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -26,6 +26,7 @@ from ..base import BaseEstimator, is_classifier, clone from ..base import MetaEstimatorMixin +from ..base import _fit_context from ._split import check_cv from ._validation import _fit_and_score from ._validation import _aggregate_score_dicts @@ -753,6 +754,10 @@ def _select_best_index(refit, refit_metric, results): best_index = results[f"rank_test_{refit_metric}"].argmin() return best_index + @_fit_context( + # *SearchCV.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, *, groups=None, **fit_params): """Run fit with all sets of parameters. @@ -786,7 +791,6 @@ def fit(self, X, y=None, *, groups=None, **fit_params): self : object Instance of fitted estimator. """ - self._validate_params() estimator = self.estimator refit_metric = "score" diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 4826e7931d4d6..a061d7283b46d 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -7,6 +7,7 @@ from ._search import BaseSearchCV from . import ParameterGrid, ParameterSampler from ..base import is_classifier +from ..base import _fit_context from ._split import check_cv, _yields_constant_splits from ..metrics._scorer import get_scorer_names from ..utils import resample @@ -211,6 +212,10 @@ def _select_best_index(refit, refit_metric, results): return last_iter_indices[best_idx] + @_fit_context( + # Halving*SearchCV.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, groups=None, **fit_params): """Run fit with all sets of parameters. @@ -238,7 +243,6 @@ def fit(self, X, y=None, groups=None, **fit_params): self : object Instance of fitted estimator. """ - self._validate_params() self._checked_cv_orig = check_cv( self.cv, y, classifier=is_classifier(self.estimator) ) diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 74684e608d3c1..4c30bcdb6cac3 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -43,6 +43,7 @@ from .base import BaseEstimator, ClassifierMixin, clone, is_classifier from .base import MultiOutputMixin from .base import MetaEstimatorMixin, is_regressor +from .base import _fit_context from .preprocessing import LabelBinarizer from .metrics.pairwise import pairwise_distances_argmin from .utils import check_random_state @@ -296,6 +297,10 @@ def __init__(self, estimator, *, n_jobs=None, verbose=0): self.n_jobs = n_jobs self.verbose = verbose + @_fit_context( + # OneVsRestClassifier.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit underlying estimators. @@ -313,8 +318,6 @@ def fit(self, X, y): self : object Instance of fitted estimator. """ - self._validate_params() - # A sparse LabelBinarizer, with sparse_output=True, has been shown to # outperform or match a dense label binarizer in all cases and has also # resulted in less or equal memory consumption in the fit_ovr function @@ -348,6 +351,10 @@ def fit(self, X, y): return self @available_if(_estimators_has("partial_fit")) + @_fit_context( + # OneVsRestClassifier.estimator is not validated yet + prefer_skip_nested_validation=False + ) def partial_fit(self, X, y, classes=None): """Partially fit underlying estimators. @@ -376,8 +383,6 @@ def partial_fit(self, X, y, classes=None): Instance of partially fitted estimator. """ if _check_partial_fit_first_call(self, classes): - self._validate_params() - if not hasattr(self.estimator, "partial_fit"): raise ValueError( ("Base estimator {0}, doesn't have partial_fit method").format( @@ -655,6 +660,10 @@ def __init__(self, estimator, *, n_jobs=None): self.estimator = estimator self.n_jobs = n_jobs + @_fit_context( + # OneVsOneClassifier.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit underlying estimators. @@ -671,7 +680,6 @@ def fit(self, X, y): self : object The fitted underlying estimator. """ - self._validate_params() # We need to validate the data because we do a safe_indexing later. X, y = self._validate_data( X, y, accept_sparse=["csr", "csc"], force_all_finite=False @@ -706,6 +714,10 @@ def fit(self, X, y): return self @available_if(_estimators_has("partial_fit")) + @_fit_context( + # OneVsOneClassifier.estimator is not validated yet + prefer_skip_nested_validation=False + ) def partial_fit(self, X, y, classes=None): """Partially fit underlying estimators. @@ -735,8 +747,6 @@ def partial_fit(self, X, y, classes=None): """ first_call = _check_partial_fit_first_call(self, classes) if first_call: - self._validate_params() - self.estimators_ = [ clone(self.estimator) for _ in range(self.n_classes_ * (self.n_classes_ - 1) // 2) @@ -968,6 +978,10 @@ def __init__(self, estimator, *, code_size=1.5, random_state=None, n_jobs=None): self.random_state = random_state self.n_jobs = n_jobs + @_fit_context( + # OutputCodeClassifier.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit underlying estimators. @@ -984,7 +998,6 @@ def fit(self, X, y): self : object Returns a fitted instance of self. """ - self._validate_params() y = self._validate_data(X="no_validation", y=y) random_state = check_random_state(self.random_state) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 90c1f04f7e46a..8bb954e976f4c 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -28,6 +28,7 @@ RegressorMixin, clone, is_classifier, + _fit_context, ) from .model_selection import cross_val_predict from .utils import _print_elapsed_time, check_random_state, Bunch @@ -104,6 +105,10 @@ def __init__(self, estimator, *, n_jobs=None): self.n_jobs = n_jobs @_available_if_estimator_has("partial_fit") + @_fit_context( + # MultiOutput*.estimator is not validated yet + prefer_skip_nested_validation=False + ) def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_params): """Incrementally fit a separate model for each class output. @@ -151,9 +156,6 @@ def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_para first_time = not hasattr(self, "estimators_") - if first_time: - self._validate_params() - y = self._validate_data(X="no_validation", y=y, multi_output=True) if y.ndim == 1: @@ -203,6 +205,10 @@ def partial_fit(self, X, y, classes=None, sample_weight=None, **partial_fit_para return self + @_fit_context( + # MultiOutput*.estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y, sample_weight=None, **fit_params): """Fit the model to data, separately for each output variable. @@ -230,8 +236,6 @@ def fit(self, X, y, sample_weight=None, **fit_params): self : object Returns a fitted instance. """ - self._validate_params() - if not hasattr(self.estimator, "fit"): raise ValueError("The base estimator should implement a fit method") @@ -887,6 +891,10 @@ class labels for each estimator in the chain. [0.0321..., 0.9935..., 0.0625...]]) """ + @_fit_context( + # ClassifierChain.base_estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, Y, **fit_params): """Fit the model to data matrix X and targets Y. @@ -917,8 +925,6 @@ def fit(self, X, Y, **fit_params): "See the User Guide for more information." ) - self._validate_params() - super().fit(X, Y, **fit_params) self.classes_ = [ estimator.classes_ for chain_idx, estimator in enumerate(self.estimators_) @@ -1109,6 +1115,10 @@ class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain): [2., 0.]]) """ + @_fit_context( + # RegressorChain.base_estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, Y, **fit_params): """Fit the model to data matrix X and targets Y. @@ -1131,8 +1141,6 @@ def fit(self, X, Y, **fit_params): self : object Returns a fitted instance. """ - self._validate_params() - super().fit(X, Y, **fit_params) return self diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 20858ac8b5577..76d7189385828 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -22,6 +22,7 @@ from scipy.special import logsumexp from .base import BaseEstimator, ClassifierMixin +from .base import _fit_context from .preprocessing import binarize from .preprocessing import LabelBinarizer from .preprocessing import label_binarize @@ -239,6 +240,7 @@ def __init__(self, *, priors=None, var_smoothing=1e-9): self.priors = priors self.var_smoothing = var_smoothing + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Gaussian Naive Bayes according to X, y. @@ -262,7 +264,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() y = self._validate_data(y=y) return self._partial_fit( X, y, np.unique(y), _refit=True, sample_weight=sample_weight @@ -346,6 +347,7 @@ def _update_mean_variance(n_past, mu, var, X, sample_weight=None): return total_mu, total_var + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, classes=None, sample_weight=None): """Incremental fit on a batch of samples. @@ -386,8 +388,6 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() - return self._partial_fit( X, y, classes, _refit=False, sample_weight=sample_weight ) @@ -643,6 +643,7 @@ def _check_alpha(self): return np.maximum(alpha, alpha_lower_bound) return alpha + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, classes=None, sample_weight=None): """Incremental fit on a batch of samples. @@ -682,9 +683,6 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): """ first_call = not hasattr(self, "classes_") - if first_call: - self._validate_params() - X, y = self._check_X_y(X, y, reset=first_call) _, n_features = X.shape @@ -728,6 +726,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): self._update_class_log_prior(class_prior=class_prior) return self + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit Naive Bayes classifier according to X, y. @@ -748,7 +747,6 @@ def fit(self, X, y, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() X, y = self._check_X_y(X, y) _, n_features = X.shape diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index dbc070987d5d0..e3e2049a8f8e5 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -18,6 +18,7 @@ from ._base import _get_weights from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin from ..base import ClassifierMixin +from ..base import _fit_context from ..metrics._pairwise_distances_reduction import ArgKminClassMode from ..utils._param_validation import StrOptions from sklearn.neighbors._base import _check_precomputed @@ -203,6 +204,10 @@ def __init__( ) self.weights = weights + @_fit_context( + # KNeighborsClassifier.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit the k-nearest neighbors classifier from the training dataset. @@ -221,8 +226,6 @@ def fit(self, X, y): self : KNeighborsClassifier The fitted k-nearest neighbors classifier. """ - self._validate_params() - return self._fit(X, y) def predict(self, X): @@ -572,6 +575,10 @@ def __init__( self.weights = weights self.outlier_label = outlier_label + @_fit_context( + # RadiusNeighborsClassifier.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit the radius neighbors classifier from the training dataset. @@ -590,7 +597,6 @@ def fit(self, X, y): self : RadiusNeighborsClassifier The fitted radius neighbors classifier. """ - self._validate_params() self._fit(X, y) classes_ = self.classes_ diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index 418761c2d21ee..e815d12e293c9 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -8,6 +8,7 @@ from ._base import NeighborsBase from ._unsupervised import NearestNeighbors from ..base import TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils._param_validation import StrOptions from ..utils.validation import check_is_fitted @@ -372,6 +373,10 @@ def __init__( ) self.mode = mode + @_fit_context( + # KNeighborsTransformer.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Fit the k-nearest neighbors transformer from the training dataset. @@ -388,7 +393,6 @@ def fit(self, X, y=None): self : KNeighborsTransformer The fitted k-nearest neighbors transformer. """ - self._validate_params() self._fit(X) self._n_features_out = self.n_samples_fit_ return self @@ -600,6 +604,10 @@ def __init__( ) self.mode = mode + @_fit_context( + # RadiusNeighborsTransformer.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Fit the radius neighbors transformer from the training dataset. @@ -617,7 +625,6 @@ def fit(self, X, y=None): self : RadiusNeighborsTransformer The fitted radius neighbors transformer. """ - self._validate_params() self._fit(X) self._n_features_out = self.n_samples_fit_ return self diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py index f285b03403b5f..7f7b38497d209 100644 --- a/sklearn/neighbors/_kde.py +++ b/sklearn/neighbors/_kde.py @@ -10,6 +10,7 @@ from scipy.special import gammainc from ..base import BaseEstimator +from ..base import _fit_context from ..neighbors._base import VALID_METRICS from ..utils import check_random_state from ..utils.validation import _check_sample_weight, check_is_fitted @@ -185,6 +186,10 @@ def _choose_algorithm(self, algorithm, metric): ) return algorithm + @_fit_context( + # KernelDensity.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, sample_weight=None): """Fit the Kernel Density model on the data. @@ -208,8 +213,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() - algorithm = self._choose_algorithm(self.algorithm, self.metric) if isinstance(self.bandwidth, str): diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index 90b3b0aa3d8ce..40cdc9ab5fb9d 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -8,6 +8,7 @@ from ._base import NeighborsBase from ._base import KNeighborsMixin from ..base import OutlierMixin +from ..base import _fit_context from numbers import Real from ..utils._param_validation import Interval, StrOptions @@ -256,6 +257,10 @@ def fit_predict(self, X, y=None): return self.fit(X)._predict() + @_fit_context( + # LocalOutlierFactor.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Fit the local outlier factor detector from the training dataset. @@ -273,8 +278,6 @@ def fit(self, X, y=None): self : LocalOutlierFactor The fitted local outlier factor detector. """ - self._validate_params() - self._fit(X) n_samples = self.n_samples_fit_ diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py index 4a83fcc7bc080..246f0adcb36ad 100644 --- a/sklearn/neighbors/_nca.py +++ b/sklearn/neighbors/_nca.py @@ -15,6 +15,7 @@ from ..utils.extmath import softmax from ..metrics import pairwise_distances from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..preprocessing import LabelEncoder from ..decomposition import PCA from ..utils.multiclass import check_classification_targets @@ -215,6 +216,7 @@ def __init__( self.verbose = verbose self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model according to the given training data. @@ -231,8 +233,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - self._validate_params() - # Validate the inputs X and y, and converts y to numerical classes. X, y = self._validate_data(X, y, ensure_min_samples=2) check_classification_targets(y) diff --git a/sklearn/neighbors/_nearest_centroid.py b/sklearn/neighbors/_nearest_centroid.py index 7b9c2479747d3..315393bf597e4 100644 --- a/sklearn/neighbors/_nearest_centroid.py +++ b/sklearn/neighbors/_nearest_centroid.py @@ -13,6 +13,7 @@ from scipy import sparse as sp from ..base import BaseEstimator, ClassifierMixin +from ..base import _fit_context from ..metrics.pairwise import pairwise_distances_argmin from ..preprocessing import LabelEncoder from ..utils.validation import check_is_fitted @@ -122,6 +123,7 @@ def __init__(self, metric="euclidean", *, shrink_threshold=None): self.metric = metric self.shrink_threshold = shrink_threshold + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """ Fit the NearestCentroid model according to the given training data. @@ -140,8 +142,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - self._validate_params() - if isinstance(self.metric, str) and self.metric not in ( "manhattan", "euclidean", diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index 003b534074ecd..b2050345c9833 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -17,6 +17,7 @@ from ._base import _get_weights from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin from ..base import RegressorMixin +from ..base import _fit_context from ..utils._param_validation import StrOptions @@ -194,6 +195,10 @@ def _more_tags(self): # For cross-validation routines to split data correctly return {"pairwise": self.metric == "precomputed"} + @_fit_context( + # KNeighborsRegressor.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit the k-nearest neighbors regressor from the training dataset. @@ -212,8 +217,6 @@ def fit(self, X, y): self : KNeighborsRegressor The fitted k-nearest neighbors regressor. """ - self._validate_params() - return self._fit(X, y) def predict(self, X): @@ -422,6 +425,10 @@ def __init__( ) self.weights = weights + @_fit_context( + # RadiusNeighborsRegressor.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """Fit the radius neighbors regressor from the training dataset. @@ -440,7 +447,6 @@ def fit(self, X, y): self : RadiusNeighborsRegressor The fitted radius neighbors regressor. """ - self._validate_params() return self._fit(X, y) def predict(self, X): diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py index 53e69495b9ed4..05607f0bd0c71 100644 --- a/sklearn/neighbors/_unsupervised.py +++ b/sklearn/neighbors/_unsupervised.py @@ -1,4 +1,5 @@ """Unsupervised nearest neighbors learner""" +from ..base import _fit_context from ._base import NeighborsBase from ._base import KNeighborsMixin from ._base import RadiusNeighborsMixin @@ -155,6 +156,10 @@ def __init__( n_jobs=n_jobs, ) + @_fit_context( + # NearestNeighbors.metric is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None): """Fit the nearest neighbors estimator from the training dataset. @@ -172,5 +177,4 @@ def fit(self, X, y=None): self : NearestNeighbors The fitted nearest neighbors estimator. """ - self._validate_params() return self._fit(X) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 5c4bc5a39aa2d..fb8eab2f1776d 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -21,6 +21,7 @@ RegressorMixin, ) from ..base import is_classifier +from ..base import _fit_context from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer from ..metrics import accuracy_score, r2_score @@ -727,6 +728,7 @@ def _update_no_improvement_count(self, early_stopping, X_val, y_val): if self.loss_curve_[-1] < self.best_loss_: self.best_loss_ = self.loss_curve_[-1] + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model to data matrix X and target(s) y. @@ -744,8 +746,6 @@ def fit(self, X, y): self : object Returns a trained MLP model. """ - self._validate_params() - return self._fit(X, y, incremental=False) def _check_solver(self): @@ -1170,6 +1170,7 @@ def _score(self, X, y): return accuracy_score(y, self._predict(X, check_input=False)) @available_if(lambda est: est._check_solver()) + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y, classes=None): """Update the model with a single iteration over the given data. @@ -1194,9 +1195,6 @@ def partial_fit(self, X, y, classes=None): self : object Trained MLP model. """ - if not hasattr(self, "coefs_"): - self._validate_params() - if _check_partial_fit_first_call(self, classes): self._label_binarizer = LabelBinarizer() if type_of_target(y).startswith("multilabel"): @@ -1624,6 +1622,7 @@ def _validate_input(self, X, y, incremental, reset): return X, y @available_if(lambda est: est._check_solver) + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y): """Update the model with a single iteration over the given data. @@ -1640,7 +1639,4 @@ def partial_fit(self, X, y): self : object Trained MLP model. """ - if not hasattr(self, "coefs_"): - self._validate_params() - return self._fit(X, y, incremental=True) diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index 0624145116180..2ded6533d8d96 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -17,6 +17,7 @@ from ..base import BaseEstimator from ..base import TransformerMixin from ..base import ClassNamePrefixFeaturesOutMixin +from ..base import _fit_context from ..utils import check_random_state from ..utils import gen_even_slices from ..utils.extmath import safe_sparse_dot @@ -269,6 +270,7 @@ def gibbs(self, v): return v_ + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Fit the model to the partial segment of the data X. @@ -285,9 +287,6 @@ def partial_fit(self, X, y=None): self : BernoulliRBM The fitted model. """ - - self._validate_params() - first_pass = not hasattr(self, "components_") X = self._validate_data( X, accept_sparse="csr", dtype=np.float64, reset=first_pass @@ -380,6 +379,7 @@ def score_samples(self, X): fe_ = self._free_energy(v_) return v.shape[1] * log_logistic(fe_ - fe) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model to the data X. @@ -396,9 +396,6 @@ def fit(self, X, y=None): self : BernoulliRBM The fitted model. """ - - self._validate_params() - X = self._validate_data(X, accept_sparse="csr", dtype=(np.float64, np.float32)) n_samples = X.shape[0] rng = check_random_state(self.random_state) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 8c5dc3bd82917..43b6b7eb0c939 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -16,6 +16,7 @@ from scipy import sparse from .base import clone, TransformerMixin +from .base import _fit_context from .preprocessing import FunctionTransformer from .utils._estimator_html_repr import _VisualBlock from .utils.metaestimators import available_if @@ -385,6 +386,10 @@ def _fit(self, X, y=None, **fit_params_steps): self.steps[step_idx] = (name, fitted_transformer) return X + @_fit_context( + # estimators in Pipeline.steps are not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y=None, **fit_params): """Fit the model. @@ -411,7 +416,6 @@ def fit(self, X, y=None, **fit_params): self : object Pipeline with fitted steps. """ - self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt = self._fit(X, y, **fit_params_steps) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): @@ -429,6 +433,10 @@ def _can_fit_transform(self): ) @available_if(_can_fit_transform) + @_fit_context( + # estimators in Pipeline.steps are not validated yet + prefer_skip_nested_validation=False + ) def fit_transform(self, X, y=None, **fit_params): """Fit the model and transform with the final estimator. @@ -456,7 +464,6 @@ def fit_transform(self, X, y=None, **fit_params): Xt : ndarray of shape (n_samples, n_transformed_features) Transformed samples. """ - self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt = self._fit(X, y, **fit_params_steps) @@ -505,6 +512,10 @@ def predict(self, X, **predict_params): return self.steps[-1][1].predict(Xt, **predict_params) @available_if(_final_estimator_has("fit_predict")) + @_fit_context( + # estimators in Pipeline.steps are not validated yet + prefer_skip_nested_validation=False + ) def fit_predict(self, X, y=None, **fit_params): """Transform the data, and apply `fit_predict` with the final estimator. @@ -533,7 +544,6 @@ def fit_predict(self, X, y=None, **fit_params): y_pred : ndarray Result of calling `fit_predict` on the final estimator. """ - self._validate_params() fit_params_steps = self._check_fit_params(**fit_params) Xt = self._fit(X, y, **fit_params_steps) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index deaf1422705e6..6ab9002718477 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -22,6 +22,7 @@ TransformerMixin, OneToOneFeatureMixin, ClassNamePrefixFeaturesOutMixin, + _fit_context, ) from ..utils import check_array from ..utils._param_validation import Interval, Options, StrOptions, validate_params @@ -435,6 +436,7 @@ def fit(self, X, y=None): self._reset() return self.partial_fit(X, y) + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Online computation of min and max on X for later scaling. @@ -456,8 +458,6 @@ def partial_fit(self, X, y=None): self : object Fitted scaler. """ - self._validate_params() - feature_range = self.feature_range if feature_range[0] >= feature_range[1]: raise ValueError( @@ -838,6 +838,7 @@ def fit(self, X, y=None, sample_weight=None): self._reset() return self.partial_fit(X, y, sample_weight) + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None, sample_weight=None): """Online computation of mean and std on X for later scaling. @@ -870,8 +871,6 @@ def partial_fit(self, X, y=None, sample_weight=None): self : object Fitted scaler. """ - self._validate_params() - first_call = not hasattr(self, "n_samples_seen_") X = self._validate_data( X, @@ -1183,6 +1182,7 @@ def fit(self, X, y=None): self._reset() return self.partial_fit(X, y) + @_fit_context(prefer_skip_nested_validation=True) def partial_fit(self, X, y=None): """Online computation of max absolute value of X for later scaling. @@ -1204,8 +1204,6 @@ def partial_fit(self, X, y=None): self : object Fitted scaler. """ - self._validate_params() - first_pass = not hasattr(self, "n_samples_seen_") X = self._validate_data( X, @@ -1514,6 +1512,7 @@ def __init__( self.unit_variance = unit_variance self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Compute the median and quantiles to be used for scaling. @@ -1531,8 +1530,6 @@ def fit(self, X, y=None): self : object Fitted scaler. """ - self._validate_params() - # at fit, convert sparse matrices to csc for optimized computation of # the quantiles X = self._validate_data( @@ -1972,6 +1969,7 @@ def __init__(self, norm="l2", *, copy=True): self.norm = norm self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Only validates estimator's parameters. @@ -1991,7 +1989,6 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() self._validate_data(X, accept_sparse="csr") return self @@ -2155,6 +2152,7 @@ def __init__(self, *, threshold=0.0, copy=True): self.threshold = threshold self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Only validates estimator's parameters. @@ -2174,7 +2172,6 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() self._validate_data(X, accept_sparse="csr") return self @@ -2634,6 +2631,7 @@ def _sparse_fit(self, X, random_state): # https://github.com/numpy/numpy/issues/14685 self.quantiles_ = np.maximum.accumulate(self.quantiles_) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Compute the quantiles used for transforming. @@ -2653,8 +2651,6 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() - if self.n_quantiles > self.subsample: raise ValueError( "The number of quantiles cannot be greater than" @@ -3101,6 +3097,7 @@ def __init__(self, method="yeo-johnson", *, standardize=True, copy=True): self.standardize = standardize self.copy = copy + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Estimate the optimal parameter lambda for each feature. @@ -3120,10 +3117,10 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() self._fit(X, y=y, force_transform=False) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y=None): """Fit `PowerTransformer` to `X`, then transform `X`. @@ -3141,7 +3138,6 @@ def fit_transform(self, X, y=None): X_new : ndarray of shape (n_samples, n_features) Transformed data. """ - self._validate_params() return self._fit(X, y, force_transform=True) def _fit(self, X, y=None, force_transform=False): diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 220950586a6ef..ac7432027f462 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -11,6 +11,7 @@ from . import OneHotEncoder from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils._param_validation import Hidden, Interval, StrOptions, Options from ..utils.validation import check_array from ..utils.validation import check_is_fitted @@ -192,6 +193,7 @@ def __init__( self.subsample = subsample self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """ Fit the estimator. @@ -216,7 +218,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Returns the instance itself. """ - self._validate_params() X = self._validate_data(X, dtype="numeric") if self.dtype in (np.float64, np.float32): diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index 1fc4b16a52467..de3f983d7ae6f 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -10,6 +10,7 @@ from scipy import sparse from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from ..base import _fit_context from ..utils import check_array, is_scalar_nan, _safe_indexing from ..utils.validation import check_is_fitted from ..utils.validation import _check_feature_names_in @@ -953,6 +954,7 @@ def _compute_n_features_outs(self): return output + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """ Fit OneHotEncoder to X. @@ -971,8 +973,6 @@ def fit(self, X, y=None): self Fitted encoder. """ - self._validate_params() - if self.sparse != "deprecated": warnings.warn( ( @@ -1446,6 +1446,7 @@ def __init__( self.min_frequency = min_frequency self.max_categories = max_categories + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """ Fit the OrdinalEncoder to X. @@ -1464,8 +1465,6 @@ def fit(self, X, y=None): self : object Fitted encoder. """ - self._validate_params() - if self.handle_unknown == "use_encoded_value": if is_scalar_nan(self.unknown_value): if np.dtype(self.dtype).kind != "f": diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py index c250c5cd0226e..d7bf1810e61c0 100644 --- a/sklearn/preprocessing/_function_transformer.py +++ b/sklearn/preprocessing/_function_transformer.py @@ -3,6 +3,7 @@ import numpy as np from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils.metaestimators import available_if from ..utils.validation import ( _allclose_dense_sparse, @@ -197,6 +198,7 @@ def _check_inverse_transform(self, X): UserWarning, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit transformer by checking X. @@ -216,7 +218,6 @@ def fit(self, X, y=None): self : object FunctionTransformer class instance. """ - self._validate_params() X = self._check_input(X, reset=True) if self.check_inverse and not (self.func is None or self.inverse_func is None): self._check_inverse_transform(X) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index ca8607b06c2e2..f656329607ee3 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -16,7 +16,7 @@ import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin - +from ..base import _fit_context from ..utils.sparsefuncs import min_max_axis from ..utils._param_validation import Interval, validate_params from ..utils import column_or_1d @@ -268,6 +268,7 @@ def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False): self.pos_label = pos_label self.sparse_output = sparse_output + @_fit_context(prefer_skip_nested_validation=True) def fit(self, y): """Fit label binarizer. @@ -282,9 +283,6 @@ def fit(self, y): self : object Returns the instance itself. """ - - self._validate_params() - if self.neg_label >= self.pos_label: raise ValueError( f"neg_label={self.neg_label} must be strictly less than " @@ -761,6 +759,7 @@ def __init__(self, *, classes=None, sparse_output=False): self.classes = classes self.sparse_output = sparse_output + @_fit_context(prefer_skip_nested_validation=True) def fit(self, y): """Fit the label sets binarizer, storing :term:`classes_`. @@ -776,7 +775,6 @@ def fit(self, y): self : object Fitted estimator. """ - self._validate_params() self._cached_dict = None if self.classes is None: @@ -794,6 +792,7 @@ def fit(self, y): self.classes_[:] = classes return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, y): """Fit the label sets binarizer and transform the given label sets. @@ -814,7 +813,6 @@ def fit_transform(self, y): if self.classes is not None: return self.fit(y).transform(y) - self._validate_params() self._cached_dict = None # Automatically increment on new class diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index f379ee9135706..8b2978b269b9d 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -12,6 +12,7 @@ from scipy.special import comb from ..base import BaseEstimator, TransformerMixin +from ..base import _fit_context from ..utils import check_array from ..utils.fixes import sp_version, parse_version from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight @@ -299,6 +300,7 @@ def get_feature_names_out(self, input_features=None): feature_names.append(name) return np.asarray(feature_names, dtype=object) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """ Compute number of output features. @@ -316,7 +318,6 @@ def fit(self, X, y=None): self : object Fitted transformer. """ - self._validate_params() _, n_features = self._validate_data(X, accept_sparse=True).shape if isinstance(self.degree, Integral): @@ -802,6 +803,7 @@ def get_feature_names_out(self, input_features=None): feature_names.append(f"{input_features[i]}_sp_{j}") return np.asarray(feature_names, dtype=object) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, sample_weight=None): """Compute knot positions of splines. @@ -823,8 +825,6 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted transformer. """ - self._validate_params() - X = self._validate_data( X, reset=True, diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py index 9100d72194a32..9dd33ddfa3cce 100644 --- a/sklearn/preprocessing/_target_encoder.py +++ b/sklearn/preprocessing/_target_encoder.py @@ -4,6 +4,7 @@ from ._encoders import _BaseEncoder from ..base import OneToOneFeatureMixin +from ..base import _fit_context from ._target_encoder_fast import _fit_encoding_fast from ._target_encoder_fast import _fit_encoding_fast_auto_smooth from ..utils.validation import _check_y, check_consistent_length @@ -176,6 +177,7 @@ def __init__( self.shuffle = shuffle self.random_state = random_state + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the :class:`TargetEncoder` to X and y. @@ -192,10 +194,10 @@ def fit(self, X, y): self : object Fitted encoder. """ - self._validate_params() self._fit_encodings_all(X, y) return self + @_fit_context(prefer_skip_nested_validation=True) def fit_transform(self, X, y): """Fit :class:`TargetEncoder` and transform X with the target encoding. @@ -219,7 +221,6 @@ def fit_transform(self, X, y): """ from ..model_selection import KFold, StratifiedKFold # avoid circular import - self._validate_params() X_ordinal, X_known_mask, y, n_categories = self._fit_encodings_all(X, y) # The cv splitter is voluntarily restricted to *KFold to enforce non diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 9e9620e089521..ca0ee41784ab5 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -36,7 +36,7 @@ from .base import BaseEstimator, TransformerMixin from .base import ClassNamePrefixFeaturesOutMixin - +from .base import _fit_context from .utils import check_random_state from .utils._param_validation import Interval, StrOptions, validate_params from .utils.extmath import safe_sparse_dot @@ -356,6 +356,7 @@ def _compute_inverse_components(self): components = components.toarray() return linalg.pinv(components, check_finite=False) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Generate a sparse random projection matrix. @@ -374,7 +375,6 @@ def fit(self, X, y=None): self : object BaseRandomProjection class instance. """ - self._validate_params() X = self._validate_data( X, accept_sparse=["csr", "csc"], dtype=[np.float64, np.float32] ) diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 95fad0713d558..9d7786bc1d67e 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -64,6 +64,7 @@ from scipy.sparse import csgraph from ..base import BaseEstimator, ClassifierMixin +from ..base import _fit_context from ..metrics.pairwise import rbf_kernel from ..neighbors import NearestNeighbors from ..utils.extmath import safe_sparse_dot @@ -230,6 +231,7 @@ class labels. probabilities /= normalizer return probabilities + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit a semi-supervised label propagation model to X. @@ -254,7 +256,6 @@ def fit(self, X, y): self : object Returns the instance itself. """ - self._validate_params() X, y = self._validate_data( X, y, diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index 2438658ed89c8..c4706df1754da 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -4,6 +4,7 @@ import numpy as np from ..base import MetaEstimatorMixin, clone, BaseEstimator +from ..base import _fit_context from ..utils._param_validation import HasMethods, Interval, StrOptions from ..utils.validation import check_is_fitted from ..utils.metaestimators import available_if @@ -171,6 +172,10 @@ def __init__( self.max_iter = max_iter self.verbose = verbose + @_fit_context( + # SelfTrainingClassifier.base_estimator is not validated yet + prefer_skip_nested_validation=False + ) def fit(self, X, y): """ Fit self-training classifier using `X`, `y` as training data. @@ -189,8 +194,6 @@ def fit(self, X, y): self : object Fitted estimator. """ - self._validate_params() - # we need row slicing support for sparce matrices, but costly finiteness check # can be delegated to the base estimator. X, y = self._validate_data( diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index 55919099e027c..a54c31cecb6e1 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -11,6 +11,7 @@ from . import _liblinear as liblinear # type: ignore from . import _libsvm_sparse as libsvm_sparse # type: ignore from ..base import BaseEstimator, ClassifierMixin +from ..base import _fit_context from ..preprocessing import LabelEncoder from ..utils.multiclass import _ovr_decision_function from ..utils import check_array, check_random_state @@ -143,6 +144,7 @@ def _more_tags(self): # Used by cross_val_score. return {"pairwise": self.kernel == "precomputed"} + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the SVM model according to the given training data. @@ -176,8 +178,6 @@ def fit(self, X, y, sample_weight=None): If X is a dense array, then the other methods will not support sparse matrices as input. """ - self._validate_params() - rnd = check_random_state(self.random_state) sparse = sp.isspmatrix(X) diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index e035e74a05e2c..a438d007da970 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -5,6 +5,7 @@ from ._base import _fit_liblinear, _get_liblinear_solver_type, BaseSVC, BaseLibSVM from ..base import BaseEstimator, RegressorMixin, OutlierMixin +from ..base import _fit_context from ..linear_model._base import LinearClassifierMixin, SparseCoefMixin, LinearModel from ..utils import deprecated from ..utils.validation import _num_samples @@ -272,6 +273,7 @@ def __init__( self.penalty = penalty self.loss = loss + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. @@ -296,8 +298,6 @@ def fit(self, X, y, sample_weight=None): self : object An instance of the estimator. """ - self._validate_params() - X, y = self._validate_data( X, y, @@ -529,6 +529,7 @@ def __init__( self.dual = dual self.loss = loss + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. @@ -553,8 +554,6 @@ def fit(self, X, y, sample_weight=None): self : object An instance of the estimator. """ - self._validate_params() - X, y = self._validate_data( X, y, diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index e4a3b0a9ee3af..d8e578424d461 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -31,6 +31,7 @@ from ..base import RegressorMixin from ..base import is_classifier from ..base import MultiOutputMixin +from ..base import _fit_context from ..utils import Bunch from ..utils import check_random_state from ..utils.validation import _check_sample_weight @@ -219,7 +220,6 @@ def _compute_feature_has_missing(self, X): def _fit( self, X, y, sample_weight=None, check_input=True, feature_has_missing=None ): - self._validate_params() random_state = check_random_state(self.random_state) if check_input: @@ -917,6 +917,7 @@ def __init__( ccp_alpha=ccp_alpha, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None, check_input=True): """Build a decision tree classifier from the training set (X, y). @@ -1278,6 +1279,7 @@ def __init__( ccp_alpha=ccp_alpha, ) + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None, check_input=True): """Build a decision tree regressor from the training set (X, y). diff --git a/sklearn/utils/tests/test_param_validation.py b/sklearn/utils/tests/test_param_validation.py index 528a667a3f58e..022f9f373a049 100644 --- a/sklearn/utils/tests/test_param_validation.py +++ b/sklearn/utils/tests/test_param_validation.py @@ -6,6 +6,7 @@ from sklearn._config import config_context, get_config from sklearn.base import BaseEstimator +from sklearn.base import _fit_context from sklearn.model_selection import LeaveOneOut from sklearn.utils import deprecated from sklearn.utils._param_validation import Hidden @@ -60,8 +61,9 @@ class _Estimator(BaseEstimator): def __init__(self, a): self.a = a + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X=None, y=None): - self._validate_params() + pass @pytest.mark.parametrize("interval_type", [Integral, Real]) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 4a765d1404794..2d39279f81745 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -42,6 +42,7 @@ from sklearn.utils import _safe_indexing from sklearn.utils.validation import ( has_fit_parameter, + _is_fitted, check_is_fitted, check_consistent_length, assert_all_finite, @@ -848,23 +849,32 @@ def fit(self, X, y): msg = "not fitted" est = MyEstimator() + assert not _is_fitted(est, attributes=["a_", "b_"]) with pytest.raises(NotFittedError, match=msg): check_is_fitted(est, attributes=["a_", "b_"]) + assert not _is_fitted(est, attributes=["a_", "b_"], all_or_any=all) with pytest.raises(NotFittedError, match=msg): check_is_fitted(est, attributes=["a_", "b_"], all_or_any=all) + assert not _is_fitted(est, attributes=["a_", "b_"], all_or_any=any) with pytest.raises(NotFittedError, match=msg): check_is_fitted(est, attributes=["a_", "b_"], all_or_any=any) est.a_ = "a" + assert not _is_fitted(est, attributes=["a_", "b_"]) with pytest.raises(NotFittedError, match=msg): check_is_fitted(est, attributes=["a_", "b_"]) + assert not _is_fitted(est, attributes=["a_", "b_"], all_or_any=all) with pytest.raises(NotFittedError, match=msg): check_is_fitted(est, attributes=["a_", "b_"], all_or_any=all) + assert _is_fitted(est, attributes=["a_", "b_"], all_or_any=any) check_is_fitted(est, attributes=["a_", "b_"], all_or_any=any) est.b_ = "b" + assert _is_fitted(est, attributes=["a_", "b_"]) check_is_fitted(est, attributes=["a_", "b_"]) + assert _is_fitted(est, attributes=["a_", "b_"], all_or_any=all) check_is_fitted(est, attributes=["a_", "b_"], all_or_any=all) + assert _is_fitted(est, attributes=["a_", "b_"], all_or_any=any) check_is_fitted(est, attributes=["a_", "b_"], all_or_any=any) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 6179d91c2a491..8ceef15986567 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1369,6 +1369,44 @@ def check_symmetric(array, *, tol=1e-10, raise_warning=True, raise_exception=Fal return array +def _is_fitted(estimator, attributes=None, all_or_any=all): + """Determine if an estimator is fitted + + Parameters + ---------- + estimator : estimator instance + Estimator instance for which the check is performed. + + attributes : str, list or tuple of str, default=None + Attribute name(s) given as string or a list/tuple of strings + Eg.: ``["coef_", "estimator_", ...], "coef_"`` + + If `None`, `estimator` is considered fitted if there exist an + attribute that ends with a underscore and does not start with double + underscore. + + all_or_any : callable, {all, any}, default=all + Specify whether all or any of the given attributes must exist. + + Returns + ------- + fitted : bool + Whether the estimator is fitted. + """ + if attributes is not None: + if not isinstance(attributes, (list, tuple)): + attributes = [attributes] + return all_or_any([hasattr(estimator, attr) for attr in attributes]) + + if hasattr(estimator, "__sklearn_is_fitted__"): + return estimator.__sklearn_is_fitted__() + + fitted_attrs = [ + v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") + ] + return len(fitted_attrs) > 0 + + def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): """Perform is_fitted validation for estimator. @@ -1425,18 +1463,7 @@ def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all): if not hasattr(estimator, "fit"): raise TypeError("%s is not an estimator instance." % (estimator)) - if attributes is not None: - if not isinstance(attributes, (list, tuple)): - attributes = [attributes] - fitted = all_or_any([hasattr(estimator, attr) for attr in attributes]) - elif hasattr(estimator, "__sklearn_is_fitted__"): - fitted = estimator.__sklearn_is_fitted__() - else: - fitted = [ - v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") - ] - - if not fitted: + if not _is_fitted(estimator, attributes, all_or_any): raise NotFittedError(msg % {"name": type(estimator).__name__})