From cd948b6d8cdb94d16e79406a36771a6c6dfc6ceb Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Sun, 15 Jul 2018 16:50:44 -0500 Subject: [PATCH 01/14] Added deprecation warning for n_estimators default value and created test --- sklearn/ensemble/forest.py | 44 +++++++++++++++++++++------ sklearn/ensemble/tests/test_forest.py | 22 ++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index b7a349d4b5a89..f080e8611716e 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -135,7 +135,7 @@ class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble)): @abstractmethod def __init__(self, base_estimator, - n_estimators=10, + n_estimators='warn', estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -242,6 +242,12 @@ def fit(self, X, y, sample_weight=None): ------- self : object """ + + if self.n_estimators == 'warn': + warnings.warn("The default value of n_estimators will change from " + "10 in version 0.20 to 100 in 0.22", FutureWarning) + self.n_estimators = 10 + # Validate or convert input data X = check_array(X, accept_sparse="csc", dtype=DTYPE) y = check_array(y, accept_sparse='csc', ensure_2d=False, dtype=None) @@ -399,7 +405,7 @@ class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest, @abstractmethod def __init__(self, base_estimator, - n_estimators=10, + n_estimators='warn', estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -408,7 +414,7 @@ def __init__(self, verbose=0, warm_start=False, class_weight=None): - + super(ForestClassifier, self).__init__( base_estimator, n_estimators=n_estimators, @@ -638,7 +644,7 @@ class ForestRegressor(six.with_metaclass(ABCMeta, BaseForest, RegressorMixin)): @abstractmethod def __init__(self, base_estimator, - n_estimators=10, + n_estimators='warn', estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -758,6 +764,10 @@ class RandomForestClassifier(ForestClassifier): n_estimators : integer, optional (default=10) The number of trees in the forest. + .. deprecated:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. + criterion : string, optional (default="gini") The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain. @@ -971,7 +981,7 @@ class labels (multi-output problem). DecisionTreeClassifier, ExtraTreesClassifier """ def __init__(self, - n_estimators=10, + n_estimators='warn', criterion="gini", max_depth=None, min_samples_split=2, @@ -1032,6 +1042,10 @@ class RandomForestRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. + .. deprecated:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. + criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance @@ -1211,7 +1225,7 @@ class RandomForestRegressor(ForestRegressor): DecisionTreeRegressor, ExtraTreesRegressor """ def __init__(self, - n_estimators=10, + n_estimators='warn', criterion="mse", max_depth=None, min_samples_split=2, @@ -1268,6 +1282,10 @@ class ExtraTreesClassifier(ForestClassifier): n_estimators : integer, optional (default=10) The number of trees in the forest. + .. deprecated:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. + criterion : string, optional (default="gini") The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain. @@ -1454,7 +1472,7 @@ class labels (multi-output problem). splits. """ def __init__(self, - n_estimators=10, + n_estimators='warn', criterion="gini", max_depth=None, min_samples_split=2, @@ -1513,6 +1531,10 @@ class ExtraTreesRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. + .. deprecated:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. + criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance @@ -1666,7 +1688,7 @@ class ExtraTreesRegressor(ForestRegressor): RandomForestRegressor: Ensemble regressor using trees with optimal splits. """ def __init__(self, - n_estimators=10, + n_estimators='warn', criterion="mse", max_depth=None, min_samples_split=2, @@ -1728,6 +1750,10 @@ class RandomTreesEmbedding(BaseForest): n_estimators : integer, optional (default=10) Number of trees in the forest. + .. deprecated:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. + max_depth : integer, optional (default=5) The maximum depth of each tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -1833,7 +1859,7 @@ class RandomTreesEmbedding(BaseForest): """ def __init__(self, - n_estimators=10, + n_estimators='warn', max_depth=5, min_samples_split=2, min_samples_leaf=1, diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 0054707ba5a0d..d2a73abe76c05 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -31,6 +31,7 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message +from sklearn.utils.testing import assert_no_warnings from sklearn.utils.testing import ignore_warnings from sklearn import datasets @@ -1228,3 +1229,24 @@ def test_min_impurity_decrease(): # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1) + +def test_nestimators_future_warning(): + # Test that n_estimators future warning is raised. Will be removed in 0.22 + + all_estimators = [RandomForestClassifier, RandomForestRegressor, + ExtraTreesClassifier, ExtraTreesRegressor, + RandomTreesEmbedding] + + # When n_estimators default value is used + msg_future = "The default value of n_estimators will change " + "from 10 to 100 in 0.22" + + for Estimator in all_estimators: + est = Estimator() + est = assert_warns_message(FutureWarning, msg_future, est.fit, X, y) + + # When n_estimators is a valid value not equal to the default + est = Estimator(n_estimators = 100) + est = assert_no_warnings(est.fit, X, y) + + From a3738cbb832a8149704250f0c22b2e1090871059 Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Sun, 15 Jul 2018 16:59:50 -0500 Subject: [PATCH 02/14] Changed msg_future string --- sklearn/ensemble/tests/test_forest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index d2a73abe76c05..e54f7a2c28c75 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1238,8 +1238,8 @@ def test_nestimators_future_warning(): RandomTreesEmbedding] # When n_estimators default value is used - msg_future = "The default value of n_estimators will change " - "from 10 to 100 in 0.22" + msg_future = "The default value of n_estimators will change from " + "10 in version 0.20 to 100 in 0.22." for Estimator in all_estimators: est = Estimator() From e991bc1eed20e135d03f182a79239879d271f72d Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Sun, 15 Jul 2018 17:12:04 -0500 Subject: [PATCH 03/14] Added period to n_estimators warning message --- sklearn/ensemble/forest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index f080e8611716e..3922924a4f419 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -245,7 +245,7 @@ def fit(self, X, y, sample_weight=None): if self.n_estimators == 'warn': warnings.warn("The default value of n_estimators will change from " - "10 in version 0.20 to 100 in 0.22", FutureWarning) + "10 in version 0.20 to 100 in 0.22.", FutureWarning) self.n_estimators = 10 # Validate or convert input data From ef33157e0c11c00fd1a87e86af849bd24f1a50b8 Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Sun, 15 Jul 2018 17:12:33 -0500 Subject: [PATCH 04/14] Fixed linting issues pertaining to code I added --- sklearn/ensemble/tests/test_forest.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index e54f7a2c28c75..70449f509a6a4 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1230,9 +1230,10 @@ def test_min_impurity_decrease(): # will suffice for the actual working of this param assert_equal(tree.min_impurity_decrease, 0.1) + def test_nestimators_future_warning(): # Test that n_estimators future warning is raised. Will be removed in 0.22 - + all_estimators = [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, RandomTreesEmbedding] @@ -1246,7 +1247,5 @@ def test_nestimators_future_warning(): est = assert_warns_message(FutureWarning, msg_future, est.fit, X, y) # When n_estimators is a valid value not equal to the default - est = Estimator(n_estimators = 100) + est = Estimator(n_estimators=100) est = assert_no_warnings(est.fit, X, y) - - From 76c80928261b057f353f392559d1941b38d6e70e Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Sun, 15 Jul 2018 17:19:05 -0500 Subject: [PATCH 05/14] Removed blank line --- sklearn/ensemble/forest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 3922924a4f419..92552cef6b563 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -414,7 +414,6 @@ def __init__(self, verbose=0, warm_start=False, class_weight=None): - super(ForestClassifier, self).__init__( base_estimator, n_estimators=n_estimators, From be10d8da16e3e78bdbe4e211635b17235d980e83 Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Tue, 17 Jul 2018 00:44:58 -0500 Subject: [PATCH 06/14] Added entry for change in default of n_estimators parameter --- doc/whats_new/v0.20.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index 0df0635d57c75..5743b099fd5e1 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -17,7 +17,6 @@ Highlights We have tried to improve our support for common data-science use-cases including missing values, categorical variables, heterogeneous data, and features/targets with unusual distributions. - Missing values in features, represented by NaNs, are now accepted in column-wise preprocessing such as scalers. Each feature is fitted disregarding NaNs, and data containing NaNs can be transformed. The new :mod:`impute` @@ -690,6 +689,15 @@ Datasets API changes summary ------------------- +Classifiers and regressors + +- The default value of the ``n_estimators`` parameter of + :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`, + :class:`ensemble.ExtraTreesClassifier`, :class:`ensemble.ExtraTreesRegressor`, + and :class:`ensemble.RandomTreesEmbedding` will change from 10 in version 0.20 + to 100 in 0.22. A FutureWarning is raised when the default value is used. + :issue:`11542` by :user:`Anna Ayzenshtat `. + Linear, kernelized and related models - Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the From fbdf66c56125be8e907183ad249b28fd4a385368 Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Tue, 17 Jul 2018 00:45:48 -0500 Subject: [PATCH 07/14] Changed deprecated to versionchanged --- sklearn/ensemble/forest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 92552cef6b563..9cb40e55d882a 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -763,7 +763,7 @@ class RandomForestClassifier(ForestClassifier): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. deprecated:: 0.20 + .. versionchanged:: 0.20 The default value of ``n_estimators`` will change from 10 in version 0.20 to 100 in version 0.22. @@ -1041,7 +1041,7 @@ class RandomForestRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. deprecated:: 0.20 + .. versionchanged:: 0.20 The default value of ``n_estimators`` will change from 10 in version 0.20 to 100 in version 0.22. @@ -1281,7 +1281,7 @@ class ExtraTreesClassifier(ForestClassifier): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. deprecated:: 0.20 + .. versionchanged:: 0.20 The default value of ``n_estimators`` will change from 10 in version 0.20 to 100 in version 0.22. @@ -1530,7 +1530,7 @@ class ExtraTreesRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. deprecated:: 0.20 + .. versionchanged:: 0.20 The default value of ``n_estimators`` will change from 10 in version 0.20 to 100 in version 0.22. @@ -1749,7 +1749,7 @@ class RandomTreesEmbedding(BaseForest): n_estimators : integer, optional (default=10) Number of trees in the forest. - .. deprecated:: 0.20 + .. versionchanged:: 0.20 The default value of ``n_estimators`` will change from 10 in version 0.20 to 100 in version 0.22. From 2aa3b7f0f2250be32ecc110499dfcc2afa3c1f25 Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Tue, 17 Jul 2018 00:46:49 -0500 Subject: [PATCH 08/14] Changed loop to pytest.mark.parametrize --- sklearn/ensemble/tests/test_forest.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 70449f509a6a4..0e4f8806e58e8 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1231,21 +1231,19 @@ def test_min_impurity_decrease(): assert_equal(tree.min_impurity_decrease, 0.1) -def test_nestimators_future_warning(): - # Test that n_estimators future warning is raised. Will be removed in 0.22 - - all_estimators = [RandomForestClassifier, RandomForestRegressor, - ExtraTreesClassifier, ExtraTreesRegressor, - RandomTreesEmbedding] +@pytest.mark.parametrize('forest', + [RandomForestClassifier, RandomForestRegressor, + ExtraTreesClassifier, ExtraTreesRegressor, + RandomTreesEmbedding]) +def test_nestimators_future_warning(forest): + # FIXME: to be removed 0.22 # When n_estimators default value is used msg_future = "The default value of n_estimators will change from " "10 in version 0.20 to 100 in 0.22." + est = forest() + est = assert_warns_message(FutureWarning, msg_future, est.fit, X, y) - for Estimator in all_estimators: - est = Estimator() - est = assert_warns_message(FutureWarning, msg_future, est.fit, X, y) - - # When n_estimators is a valid value not equal to the default - est = Estimator(n_estimators=100) - est = assert_no_warnings(est.fit, X, y) + # When n_estimators is a valid value not equal to the default + est = forest(n_estimators=100) + est = assert_no_warnings(est.fit, X, y) From cec4dd70ae3b84ffd97e6584043e112eeb99900f Mon Sep 17 00:00:00 2001 From: Anna Ayzenshtat Date: Tue, 17 Jul 2018 01:18:55 -0500 Subject: [PATCH 09/14] Added pytest.mark.filterwarnings to filter n_estimators warning --- sklearn/ensemble/tests/test_forest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 0e4f8806e58e8..4ab818959cb08 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -1235,6 +1235,7 @@ def test_min_impurity_decrease(): [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, RandomTreesEmbedding]) +@pytest.mark.filterwarnings('ignore:default value of n_estimators') def test_nestimators_future_warning(forest): # FIXME: to be removed 0.22 From 2cf678e19f72d0c531cb6fcacefb48c5607ff0c8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 Jul 2018 17:32:36 +0200 Subject: [PATCH 10/14] TST add filter warnings in the ensemble module --- setup.cfg | 2 +- sklearn/ensemble/forest.py | 34 +++++++++---------- sklearn/ensemble/tests/test_forest.py | 20 +++++++++-- .../ensemble/tests/test_voting_classifier.py | 14 ++++++++ .../ensemble/tests/test_weight_boosting.py | 3 ++ sklearn/utils/estimator_checks.py | 8 ++++- 6 files changed, 59 insertions(+), 22 deletions(-) diff --git a/setup.cfg b/setup.cfg index b02383bae3b55..125afe1f535a0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ test = pytest # using yield with parametrize addopts = --doctest-modules - --disable-pytest-warnings + # --disable-pytest-warnings -rs [wheelhouse_uploader] diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 9cb40e55d882a..1345e599a8dcc 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -135,7 +135,7 @@ class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble)): @abstractmethod def __init__(self, base_estimator, - n_estimators='warn', + n_estimators=100, estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -405,7 +405,7 @@ class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest, @abstractmethod def __init__(self, base_estimator, - n_estimators='warn', + n_estimators=100, estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -643,7 +643,7 @@ class ForestRegressor(six.with_metaclass(ABCMeta, BaseForest, RegressorMixin)): @abstractmethod def __init__(self, base_estimator, - n_estimators='warn', + n_estimators=100, estimator_params=tuple(), bootstrap=False, oob_score=False, @@ -764,8 +764,8 @@ class RandomForestClassifier(ForestClassifier): The number of trees in the forest. .. versionchanged:: 0.20 - The default value of ``n_estimators`` will change from 10 in - version 0.20 to 100 in version 0.22. + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. criterion : string, optional (default="gini") The function to measure the quality of a split. Supported criteria are @@ -1041,9 +1041,9 @@ class RandomForestRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. versionchanged:: 0.20 - The default value of ``n_estimators`` will change from 10 in - version 0.20 to 100 in version 0.22. + .. versionchanged:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria @@ -1281,9 +1281,9 @@ class ExtraTreesClassifier(ForestClassifier): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. versionchanged:: 0.20 - The default value of ``n_estimators`` will change from 10 in - version 0.20 to 100 in version 0.22. + .. versionchanged:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. criterion : string, optional (default="gini") The function to measure the quality of a split. Supported criteria are @@ -1530,9 +1530,9 @@ class ExtraTreesRegressor(ForestRegressor): n_estimators : integer, optional (default=10) The number of trees in the forest. - .. versionchanged:: 0.20 - The default value of ``n_estimators`` will change from 10 in - version 0.20 to 100 in version 0.22. + .. versionchanged:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. criterion : string, optional (default="mse") The function to measure the quality of a split. Supported criteria @@ -1749,9 +1749,9 @@ class RandomTreesEmbedding(BaseForest): n_estimators : integer, optional (default=10) Number of trees in the forest. - .. versionchanged:: 0.20 - The default value of ``n_estimators`` will change from 10 in - version 0.20 to 100 in version 0.22. + .. versionchanged:: 0.20 + The default value of ``n_estimators`` will change from 10 in + version 0.20 to 100 in version 0.22. max_depth : integer, optional (default=5) The maximum depth of each tree. If None, then nodes are expanded until diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 4ab818959cb08..6ed0769d005a7 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -187,6 +187,7 @@ def check_regressor_attributes(name): assert_false(hasattr(r, "n_classes_")) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_REGRESSORS) def test_regressor_attributes(name): check_regressor_attributes(name) @@ -433,6 +434,7 @@ def check_oob_score_raise_error(name): bootstrap=False).fit, X, y) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_ESTIMATORS) def test_oob_score_raise_error(name): check_oob_score_raise_error(name) @@ -490,6 +492,7 @@ def check_pickle(name, X, y): assert_equal(score, score2) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS) def test_pickle(name): if name in FOREST_CLASSIFIERS: @@ -527,6 +530,7 @@ def check_multioutput(name): assert_equal(log_proba[1].shape, (4, 4)) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS) def test_multioutput(name): check_multioutput(name) @@ -550,6 +554,7 @@ def check_classes_shape(name): assert_array_equal(clf.classes_, [[-1, 1], [-2, 2]]) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) def test_classes_shape(name): check_classes_shape(name) @@ -739,6 +744,7 @@ def check_min_samples_split(name): "Failed with {0}".format(name)) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_ESTIMATORS) def test_min_samples_split(name): check_min_samples_split(name) @@ -776,6 +782,7 @@ def check_min_samples_leaf(name): "Failed with {0}".format(name)) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_ESTIMATORS) def test_min_samples_leaf(name): check_min_samples_leaf(name) @@ -843,6 +850,7 @@ def check_sparse_input(name, X, X_sparse, y): dense.fit_transform(X).toarray()) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @pytest.mark.parametrize('sparse_matrix', (csr_matrix, csc_matrix, coo_matrix)) @@ -900,6 +908,7 @@ def check_memory_layout(name, dtype): assert_array_almost_equal(est.fit(X, y).predict(X), y) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS) @pytest.mark.parametrize('dtype', (np.float64, np.float32)) def test_memory_layout(name, dtype): @@ -978,6 +987,7 @@ def check_class_weights(name): clf.fit(iris.data, iris.target, sample_weight=sample_weight) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) def test_class_weights(name): check_class_weights(name) @@ -997,6 +1007,7 @@ def check_class_weight_balanced_and_bootstrap_multi_output(name): clf.fit(X, _y) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) def test_class_weight_balanced_and_bootstrap_multi_output(name): check_class_weight_balanced_and_bootstrap_multi_output(name) @@ -1027,6 +1038,7 @@ def check_class_weight_errors(name): assert_raises(ValueError, clf.fit, X, _y) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) def test_class_weight_errors(name): check_class_weight_errors(name) @@ -1164,6 +1176,7 @@ def test_warm_start_oob(name): check_warm_start_oob(name) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_dtype_convert(n_classes=15): classifier = RandomForestClassifier(random_state=0, bootstrap=False) @@ -1202,6 +1215,7 @@ def test_decision_path(name): check_decision_path(name) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_min_impurity_split(): # Test if min_impurity_split of base estimators is set # Regression test for #8006 @@ -1217,6 +1231,7 @@ def test_min_impurity_split(): assert_equal(tree.min_impurity_split, 0.1) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_min_impurity_decrease(): X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) all_estimators = [RandomForestClassifier, RandomForestRegressor, @@ -1235,13 +1250,12 @@ def test_min_impurity_decrease(): [RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, RandomTreesEmbedding]) -@pytest.mark.filterwarnings('ignore:default value of n_estimators') def test_nestimators_future_warning(forest): # FIXME: to be removed 0.22 # When n_estimators default value is used - msg_future = "The default value of n_estimators will change from " - "10 in version 0.20 to 100 in 0.22." + msg_future = ("The default value of n_estimators will change from " + "10 in version 0.20 to 100 in 0.22.") est = forest() est = assert_warns_message(FutureWarning, msg_future, est.fit, X, y) diff --git a/sklearn/ensemble/tests/test_voting_classifier.py b/sklearn/ensemble/tests/test_voting_classifier.py index d5a8e055f5d45..5172c4391c633 100644 --- a/sklearn/ensemble/tests/test_voting_classifier.py +++ b/sklearn/ensemble/tests/test_voting_classifier.py @@ -1,6 +1,8 @@ """Testing for the VotingClassifier""" +import pytest import numpy as np + from sklearn.utils.testing import assert_almost_equal, assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_equal, assert_true, assert_false @@ -74,6 +76,7 @@ def test_notfitted(): assert_raise_message(NotFittedError, msg, eclf.predict_proba, X) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_majority_label_iris(): """Check classification by majority label on dataset iris.""" clf1 = LogisticRegression(random_state=123) @@ -86,6 +89,7 @@ def test_majority_label_iris(): assert_almost_equal(scores.mean(), 0.95, decimal=2) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_tie_situation(): """Check voting classifier selects smaller class label in tie situation.""" clf1 = LogisticRegression(random_state=123) @@ -97,6 +101,7 @@ def test_tie_situation(): assert_equal(eclf.fit(X, y).predict(X)[73], 1) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_weights_iris(): """Check classification by average probabilities on dataset iris.""" clf1 = LogisticRegression(random_state=123) @@ -110,6 +115,7 @@ def test_weights_iris(): assert_almost_equal(scores.mean(), 0.93, decimal=2) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_predict_on_toy_problem(): """Manually check predicted class labels for toy dataset.""" clf1 = LogisticRegression(random_state=123) @@ -142,6 +148,7 @@ def test_predict_on_toy_problem(): assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_predict_proba_on_toy_problem(): """Calculate predicted probabilities on toy dataset.""" clf1 = LogisticRegression(random_state=123) @@ -209,6 +216,7 @@ def test_multilabel(): return +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_gridsearch(): """Check GridSearch support.""" clf1 = LogisticRegression(random_state=1) @@ -226,6 +234,7 @@ def test_gridsearch(): grid.fit(iris.data, iris.target) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_parallel_fit(): """Check parallel backend of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) @@ -247,6 +256,7 @@ def test_parallel_fit(): assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_sample_weight(): """Tests sample_weight parameter of VotingClassifier""" clf1 = LogisticRegression(random_state=123) @@ -290,6 +300,7 @@ def fit(self, X, y, *args, **sample_weight): eclf.fit(X, y, sample_weight=np.ones((len(y),))) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_set_params(): """set_params should be able to set estimators""" clf1 = LogisticRegression(random_state=123, C=1.0) @@ -324,6 +335,7 @@ def test_set_params(): eclf1.get_params()["lr"].get_params()['C']) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_set_estimator_none(): """VotingClassifier set_params should be able to set estimators as None""" # Test predict @@ -376,6 +388,7 @@ def test_set_estimator_none(): assert_array_equal(eclf2.transform(X1), np.array([[0], [1]])) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_estimator_weights_format(): # Test estimator weights inputs as list and array clf1 = LogisticRegression(random_state=123) @@ -393,6 +406,7 @@ def test_estimator_weights_format(): assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_transform(): """Check transform method of VotingClassifier on toy dataset.""" clf1 = LogisticRegression(random_state=123) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 4a8a806ed6a62..6b62d87d94d18 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -1,6 +1,8 @@ """Testing for the boost module (sklearn.ensemble.boost).""" +import pytest import numpy as np + from sklearn.utils.testing import assert_array_equal, assert_array_less from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_equal, assert_true, assert_greater @@ -277,6 +279,7 @@ def test_error(): X, y_class, sample_weight=np.asarray([-1])) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_base_estimator(): # Test different base estimators. from sklearn.ensemble import RandomForestClassifier diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index d25abbe6377db..20924d64e79ec 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -340,7 +340,13 @@ def set_checking_parameters(estimator): estimator.set_params(n_resampling=5) if "n_estimators" in params: # especially gradient boosting with default 100 - estimator.set_params(n_estimators=min(5, estimator.n_estimators)) + # FIXME: The default number of trees was changed and is set to 'warn' + # for some of the ensemble methods. We need to catch this case to avoid + # an error during the comparison. To be reverted in 0.22. + if estimator.n_estimators == 'warn': + estimator.set_params(n_estimators=5) + else: + estimator.set_params(n_estimators=min(5, estimator.n_estimators)) if "max_trials" in params: # RANSAC estimator.set_params(max_trials=10) From 0305354c3ab1ba0cc7f40e24f09e2986e34f6749 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 Jul 2018 18:27:21 +0200 Subject: [PATCH 11/14] TST avoid further future warning in tests --- sklearn/feature_selection/tests/test_from_model.py | 3 +++ sklearn/feature_selection/tests/test_rfe.py | 2 ++ sklearn/tests/test_calibration.py | 3 ++- sklearn/utils/tests/test_estimator_checks.py | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 6efec43dce37b..b5e965dcad775 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from sklearn.utils.testing import assert_true @@ -32,6 +33,7 @@ def test_invalid_input(): assert_raises(ValueError, model.transform, data) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_input_estimator_unchanged(): # Test that SelectFromModel fits on a clone of the estimator. est = RandomForestClassifier() @@ -119,6 +121,7 @@ def test_2d_coef(): assert_array_almost_equal(X_new, X[:, feature_mask]) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_partial_fit(): est = PassiveAggressiveClassifier(random_state=0, shuffle=False, max_iter=5, tol=None) diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 3cee0fa6f605e..e8533d808dafd 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -1,6 +1,7 @@ """ Testing Recursive feature elimination """ +import pytest import numpy as np from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse @@ -336,6 +337,7 @@ def test_rfe_cv_n_jobs(): assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores) +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_rfe_cv_groups(): generator = check_random_state(0) iris = load_iris() diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 404c3a797c1db..c99f79bbebd3b 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -2,6 +2,7 @@ # License: BSD 3 clause from __future__ import division +import pytest import numpy as np from scipy import sparse from sklearn.model_selection import LeaveOneOut @@ -24,7 +25,7 @@ from sklearn.calibration import calibration_curve -@ignore_warnings +@pytest.mark.filterwarnings('ignore:The default value of n_estimators') def test_calibration(): """Test calibration objects with isotonic and sigmoid""" n_samples = 100 diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 9b4a9c4f87c16..5ecbd1f14afdd 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1,6 +1,7 @@ import unittest import sys +import pytest import numpy as np import scipy.sparse as sp From a7e7a93eee1ca53848e688bf4fdaebf80f81b6cb Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 Jul 2018 18:31:53 +0200 Subject: [PATCH 12/14] MAINT do not show the warning --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 125afe1f535a0..b02383bae3b55 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,7 @@ test = pytest # using yield with parametrize addopts = --doctest-modules - # --disable-pytest-warnings + --disable-pytest-warnings -rs [wheelhouse_uploader] From 68d9168b3270bead1acf82db182794954f84276a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 Jul 2018 18:49:19 +0200 Subject: [PATCH 13/14] DOC set the number of estimators in examples --- examples/applications/plot_prediction_latency.py | 2 +- examples/ensemble/plot_ensemble_oob.py | 9 ++++++--- .../plot_random_forest_regression_multioutput.py | 6 ++++-- examples/ensemble/plot_voting_probas.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 8d4d9c7465939..dfa9d95c6bf62 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -285,7 +285,7 @@ def plot_benchmark_throughput(throughputs, configuration): 'complexity_label': 'non-zero coefficients', 'complexity_computer': lambda clf: np.count_nonzero(clf.coef_)}, {'name': 'RandomForest', - 'instance': RandomForestRegressor(), + 'instance': RandomForestRegressor(n_estimators=100), 'complexity_label': 'estimators', 'complexity_computer': lambda clf: clf.n_estimators}, {'name': 'SVR', diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index 19b01772d5c24..081025c8170de 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -45,15 +45,18 @@ # error trajectory during training. ensemble_clfs = [ ("RandomForestClassifier, max_features='sqrt'", - RandomForestClassifier(warm_start=True, oob_score=True, + RandomForestClassifier(n_estimators=100, + warm_start=True, oob_score=True, max_features="sqrt", random_state=RANDOM_STATE)), ("RandomForestClassifier, max_features='log2'", - RandomForestClassifier(warm_start=True, max_features='log2', + RandomForestClassifier(n_estimators=100, + warm_start=True, max_features='log2', oob_score=True, random_state=RANDOM_STATE)), ("RandomForestClassifier, max_features=None", - RandomForestClassifier(warm_start=True, max_features=None, + RandomForestClassifier(n_estimators=100, + warm_start=True, max_features=None, oob_score=True, random_state=RANDOM_STATE)) ] diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py index 44618357cda45..8f5b8ff021af4 100644 --- a/examples/ensemble/plot_random_forest_regression_multioutput.py +++ b/examples/ensemble/plot_random_forest_regression_multioutput.py @@ -44,11 +44,13 @@ random_state=4) max_depth = 30 -regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth, +regr_multirf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, + max_depth=max_depth, random_state=0)) regr_multirf.fit(X_train, y_train) -regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2) +regr_rf = RandomForestRegressor(n_estimators=100, max_depth=max_depth, + random_state=2) regr_rf.fit(X_train, y_train) # Predict on new data diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 7bed271fbf9b9..c729818620a60 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -30,7 +30,7 @@ from sklearn.ensemble import VotingClassifier clf1 = LogisticRegression(random_state=123) -clf2 = RandomForestClassifier(random_state=123) +clf2 = RandomForestClassifier(n_estimators=100, random_state=123) clf3 = GaussianNB() X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) From bb1d78628a8723c75f9043dde504ea5ddd382e9a Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 17 Jul 2018 18:55:00 +0200 Subject: [PATCH 14/14] cleaning --- sklearn/utils/tests/test_estimator_checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 5ecbd1f14afdd..9b4a9c4f87c16 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1,7 +1,6 @@ import unittest import sys -import pytest import numpy as np import scipy.sparse as sp