From d0fdb4e859e3116976eec4a68c59d145301ccbff Mon Sep 17 00:00:00 2001 From: Laurent Direr Date: Sat, 10 Jun 2017 17:11:48 +0200 Subject: [PATCH 1/3] Add deprecation warning for iid in BaseSearchCV --- sklearn/grid_search.py | 5 +++++ sklearn/model_selection/tests/test_search.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 76cdaa7cb1de5..b1644089ebe65 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -398,6 +398,11 @@ def __init__(self, estimator, scoring=None, self.pre_dispatch = pre_dispatch self.error_score = error_score + if not self.iid: + warnings.warn("The `iid` parameter has been deprecated " + "in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + @property def _estimator_type(self): return self.estimator._estimator_type diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 1d6cf50ec1c33..e4af366c6a61e 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1317,3 +1317,11 @@ def test_transform_inverse_transform_round_trip(): grid_search.fit(X, y) X_round_trip = grid_search.inverse_transform(grid_search.transform(X)) assert_array_equal(X, X_round_trip) + + +@ignore_warnings(category=DeprecationWarning) +def test_deprecated_grid_search_idd(): + depr_message = ("The `iid` parameter has been deprecated in version 0.19 " + "and will be removed in 0.21.") + assert_warns_message(DeprecationWarning, depr_message, GridSearchCV, + SVC(), [], iid=False) From be313679c5cdfc691d1751b645fe9fa18b53e8b8 Mon Sep 17 00:00:00 2001 From: Laurent Direr Date: Sat, 17 Jun 2017 16:30:13 +0200 Subject: [PATCH 2/3] Revert changes on deprecated class and add deprecation to refactored model_selection module --- sklearn/grid_search.py | 5 ---- sklearn/model_selection/_search.py | 28 +++++++++++++++----- sklearn/model_selection/tests/test_search.py | 4 +-- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index b1644089ebe65..76cdaa7cb1de5 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -398,11 +398,6 @@ def __init__(self, estimator, scoring=None, self.pre_dispatch = pre_dispatch self.error_score = error_score - if not self.iid: - warnings.warn("The `iid` parameter has been deprecated " - "in version 0.19 and will be removed in 0.21.", - DeprecationWarning) - @property def _estimator_type(self): return self.estimator._estimator_type diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 67bd8597de0d4..4c8ba9a9b6b97 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -379,7 +379,7 @@ class BaseSearchCV(six.with_metaclass(ABCMeta, BaseEstimator, @abstractmethod def __init__(self, estimator, scoring=None, - fit_params=None, n_jobs=1, iid=True, + fit_params=None, n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True): @@ -395,6 +395,11 @@ def __init__(self, estimator, scoring=None, self.error_score = error_score self.return_train_score = return_train_score + if self.iid is not None: + warnings.warn("The `iid` parameter has been deprecated " + "in version 0.19 and will be removed in 0.21.", + DeprecationWarning) + @property def _estimator_type(self): return self.estimator._estimator_type @@ -640,7 +645,8 @@ def _store(key_name, array, weights=None, splits=False, rank=False): dtype=np.int) _store('test_score', test_scores, splits=True, rank=True, - weights=test_sample_counts if self.iid else None) + weights=test_sample_counts if (self.iid or self.iid is None) + else None) if self.return_train_score: _store('train_score', train_scores, splits=True) _store('fit_time', fit_time) @@ -781,11 +787,16 @@ class GridSearchCV(BaseSearchCV): - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' - iid : boolean, default=True + iid : boolean, default=None If True, the data is assumed to be identically distributed across the folds, and the loss minimized is the total loss per sample, and not the mean loss across the folds. + ..deprecated:: 0.19 + Parameter ``iid`` has been deprecated in version 0.19 and + will be removed in 0.21. + Future (and default) behavior is equivalent to `iid=true`. + cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -954,7 +965,7 @@ class GridSearchCV(BaseSearchCV): """ def __init__(self, estimator, param_grid, scoring=None, fit_params=None, - n_jobs=1, iid=True, refit=True, cv=None, verbose=0, + n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', error_score='raise', return_train_score=True): super(GridSearchCV, self).__init__( @@ -1046,11 +1057,16 @@ class RandomizedSearchCV(BaseSearchCV): - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' - iid : boolean, default=True + iid : boolean, default=None If True, the data is assumed to be identically distributed across the folds, and the loss minimized is the total loss per sample, and not the mean loss across the folds. + ..deprecated:: 0.19 + Parameter ``iid`` has been deprecated in version 0.19 and + will be removed in 0.21. + Future (and default) behavior is equivalent to `iid=true`. + cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1189,7 +1205,7 @@ class RandomizedSearchCV(BaseSearchCV): """ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, - fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, + fit_params=None, n_jobs=1, iid=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True): self.param_distributions = param_distributions diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index e4af366c6a61e..0100c3d686bbc 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -834,6 +834,7 @@ def test_random_search_cv_results(): check_cv_results_grid_scores_consistency(search) +@ignore_warnings(category=DeprecationWarning) def test_search_iid_param(): # Test the IID parameter # noise-free simple 2d-data @@ -855,7 +856,7 @@ def test_search_iid_param(): cv=cv) for search in (grid_search, random_search): search.fit(X, y) - assert_true(search.iid) + assert_true(search.iid or search.iid is None) test_cv_scores = np.array(list(search.cv_results_['split%d_test_score' % s_i][0] @@ -1319,7 +1320,6 @@ def test_transform_inverse_transform_round_trip(): assert_array_equal(X, X_round_trip) -@ignore_warnings(category=DeprecationWarning) def test_deprecated_grid_search_idd(): depr_message = ("The `iid` parameter has been deprecated in version 0.19 " "and will be removed in 0.21.") From 7ad4b06cca5891bdaa2fddb0b0039075947a0ab3 Mon Sep 17 00:00:00 2001 From: Laurent Direr Date: Sat, 17 Jun 2017 16:49:01 +0200 Subject: [PATCH 3/3] Adding deprecation to changelog --- doc/whats_new.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 70310366d0fac..d4c97be86e755 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -450,6 +450,12 @@ API changes summary :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. + - The ``iid`` parameter of :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` has been deprecated and will + be removed in version 0.21. Future behavior will be the current default + behavior (equivalent to ``iid=True``). + :issue:`#9085` by :user:`Laurent Direr`. + - Gradient boosting base models are no longer estimators. By `Andreas Müller`_. - :class:`feature_selection.SelectFromModel` now validates the ``threshold``