From 429982403402369b94466ee0bac251e43c86c723 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sun, 12 May 2024 14:03:55 +1000 Subject: [PATCH 1/6] add forest warm start --- doc/modules/ensemble.rst | 19 +++++++++++++++++++ sklearn/ensemble/_forest.py | 10 +++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 4237d023973f7..c895b295fbff5 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1232,6 +1232,25 @@ estimation. representations of feature space, also these approaches focus also on dimensionality reduction. +.. _tree_ensemble_warm_start: + +Fitting additional trees +------------------------ + +RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all support +``warm_start=True`` which allows you to add more trees to an already fitted model. + +:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + + >>> X, y = make_classification(n_samples=100, random_state=1) + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, y) # fit with 10 trees + >>> _ = clf.set_params(n_estimators=20, warm_start=True) # set warm_start and increase num of estimators + >>> _ = clf.fit(X, y) # fit additional 10 trees + .. _bagging: Bagging meta-estimator diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 6b1b842f5367b..28c404c3e406b 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -1308,7 +1308,7 @@ class RandomForestClassifier(ForestClassifier): When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`Glossary ` and - :ref:`gradient_boosting_warm_start` for details. + :ref:`tree_ensemble_warm_start` for details. class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ default=None @@ -1710,7 +1710,7 @@ class RandomForestRegressor(ForestRegressor): When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`Glossary ` and - :ref:`gradient_boosting_warm_start` for details. + :ref:`tree_ensemble_warm_start` for details. ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The @@ -2049,7 +2049,7 @@ class ExtraTreesClassifier(ForestClassifier): When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`Glossary ` and - :ref:`gradient_boosting_warm_start` for details. + :ref:`tree_ensemble_warm_start` for details. class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ default=None @@ -2434,7 +2434,7 @@ class ExtraTreesRegressor(ForestRegressor): When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`Glossary ` and - :ref:`gradient_boosting_warm_start` for details. + :ref:`tree_ensemble_warm_start` for details. ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The @@ -2727,7 +2727,7 @@ class RandomTreesEmbedding(TransformerMixin, BaseForest): When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`Glossary ` and - :ref:`gradient_boosting_warm_start` for details. + :ref:`tree_ensemble_warm_start` for details. Attributes ---------- From 332c4a254f9c40903963c88b7a181bc7be8120d0 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sun, 12 May 2024 15:07:59 +1000 Subject: [PATCH 2/6] add random --- doc/modules/ensemble.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index c895b295fbff5..ee5f9168c2bb6 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1251,6 +1251,19 @@ RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all suppo >>> _ = clf.set_params(n_estimators=20, warm_start=True) # set warm_start and increase num of estimators >>> _ = clf.fit(X, y) # fit additional 10 trees +When ``random_state`` is also set, the internal random state is also preserved +between ``fit`` calls. This means that training a model once with ``n`` estimators is +the same as building the model iteratively via multiple ``fit`` calls, where the +final number of estimators is equal to ``n``. + +:: + + >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 + >>> clf.fit(X, y) # fit `estimators_` will be the same as the classifier above + +Note that this differs from the usual behavior of :term:`random_state` in that it does +*not* result in the same results across different calls. + .. _bagging: Bagging meta-estimator From d4c5f5b0b7b4e24f41f41d469e528d307ab54666 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sun, 12 May 2024 15:17:16 +1000 Subject: [PATCH 3/6] typo --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index ee5f9168c2bb6..b6e75e3f5b5af 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1262,7 +1262,7 @@ final number of estimators is equal to ``n``. >>> clf.fit(X, y) # fit `estimators_` will be the same as the classifier above Note that this differs from the usual behavior of :term:`random_state` in that it does -*not* result in the same results across different calls. +*not* result in the same result across different calls. .. _bagging: From 9964f69c1486a49d424fc6b84a40acf2e0e18f18 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 14 May 2024 16:26:36 +1000 Subject: [PATCH 4/6] review --- doc/modules/ensemble.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index b6e75e3f5b5af..4a19846f88cc6 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1248,8 +1248,13 @@ RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all suppo >>> X, y = make_classification(n_samples=100, random_state=1) >>> clf = RandomForestClassifier(n_estimators=10) >>> clf = clf.fit(X, y) # fit with 10 trees - >>> _ = clf.set_params(n_estimators=20, warm_start=True) # set warm_start and increase num of estimators + >>> len(clf.estimators_) + 10 + >>> # set warm_start and increase num of estimators + >>> _ = clf.set_params(n_estimators=20, warm_start=True) >>> _ = clf.fit(X, y) # fit additional 10 trees + >>> len(clf.estimators_) + 20 When ``random_state`` is also set, the internal random state is also preserved between ``fit`` calls. This means that training a model once with ``n`` estimators is From f734e79c6e1a9219eccaacff4a7185622b86ed5c Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 14 May 2024 16:30:45 +1000 Subject: [PATCH 5/6] fix --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 4a19846f88cc6..c4da1d41e3f55 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1264,7 +1264,7 @@ final number of estimators is equal to ``n``. :: >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 - >>> clf.fit(X, y) # fit `estimators_` will be the same as the classifier above + >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as the classifier above Note that this differs from the usual behavior of :term:`random_state` in that it does *not* result in the same result across different calls. From 678e39add1a28a158149a5ce6fe4ebac6caf5e17 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 14 May 2024 16:31:20 +1000 Subject: [PATCH 6/6] wording --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index c4da1d41e3f55..ad2ccd15475f7 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1264,7 +1264,7 @@ final number of estimators is equal to ``n``. :: >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 - >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as the classifier above + >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as `clf` above Note that this differs from the usual behavior of :term:`random_state` in that it does *not* result in the same result across different calls.