diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 70310366d0fac..d4c97be86e755 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -450,6 +450,12 @@ API changes summary :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. + - The ``iid`` parameter of :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` has been deprecated and will + be removed in version 0.21. Future behavior will be the current default + behavior (equivalent to ``iid=True``). + :issue:`#9085` by :user:`Laurent Direr`. + - Gradient boosting base models are no longer estimators. By `Andreas Müller`_. - :class:`feature_selection.SelectFromModel` now validates the ``threshold`` diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 67bd8597de0d4..4c8ba9a9b6b97 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -379,7 +379,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, @abstractmethod def __init__(self, estimator, scoring=None, - fit_params=None, n_jobs=1, iid=True, + fit_params=None, n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True): @@ -395,6 +395,11 @@ def __init__(self, estimator, scoring=None, self.error_score = error_score self.return_train_score = return_train_score + if self.iid is not None: + warnings.warn("The `iid` parameter has been deprecated " + "in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + @property def _estimator_type(self): return self.estimator._estimator_type @@ -640,7 +645,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False): dtype=np.int) _store('test_score', test_scores, splits=True, rank=True, - weights=test_sample_counts if self.iid else None) + weights=test_sample_counts if (self.iid or self.iid is None) + else None) if self.return_train_score: _store('train_score', train_scores, splits=True) _store('fit_time', fit_time) @@ -781,11 +787,16 @@ class GridSearchCV(BaseSearchCV): - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' - iid : boolean, default=True + iid : boolean, default=None If True, the data is assumed to be identically distributed across the folds, and the loss minimized is the total loss per sample, and not the mean loss across the folds. + ..deprecated:: 0.19 + Parameter ``iid`` has been deprecated in version 0.19 and + will be removed in 0.21. + Future (and default) behavior is equivalent to `iid=true`. + cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -954,7 +965,7 @@ class GridSearchCV(BaseSearchCV): """ def __init__(self, estimator, param_grid, scoring=None, fit_params=None, - n_jobs=1, iid=True, refit=True, cv=None, verbose=0, + n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True): super(GridSearchCV, self).__init__( @@ -1046,11 +1057,16 @@ class RandomizedSearchCV(BaseSearchCV): - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' - iid : boolean, default=True + iid : boolean, default=None If True, the data is assumed to be identically distributed across the folds, and the loss minimized is the total loss per sample, and not the mean loss across the folds. + ..deprecated:: 0.19 + Parameter ``iid`` has been deprecated in version 0.19 and + will be removed in 0.21. + Future (and default) behavior is equivalent to `iid=true`. + cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1189,7 +1205,7 @@ class RandomizedSearchCV(BaseSearchCV): """ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, - fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, + fit_params=None, n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True): self.param_distributions = param_distributions diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 1d6cf50ec1c33..0100c3d686bbc 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -834,6 +834,7 @@ def test_random_search_cv_results(): check_cv_results_grid_scores_consistency(search) +@ignore_warnings(category=DeprecationWarning) def test_search_iid_param(): # Test the IID parameter # noise-free simple 2d-data @@ -855,7 +856,7 @@ def test_search_iid_param(): cv=cv) for search in (grid_search, random_search): search.fit(X, y) - assert_true(search.iid) + assert_true(search.iid or search.iid is None) test_cv_scores = np.array(list(search.cv_results_['split%d_test_score' % s_i][0] @@ -1317,3 +1318,10 @@ def test_transform_inverse_transform_round_trip(): grid_search.fit(X, y) X_round_trip = grid_search.inverse_transform(grid_search.transform(X)) assert_array_equal(X, X_round_trip) + + +def test_deprecated_grid_search_idd(): + depr_message = ("The `iid` parameter has been deprecated in version 0.19 " + "and will be removed in 0.21.") + assert_warns_message(DeprecationWarning, depr_message, GridSearchCV, + SVC(), [], iid=False)