scikit-learn · adrinjalali · Sep 10, 2019 · Sep 6, 2019 · Sep 6, 2019 · Sep 6, 2019
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
@@ -320,17 +320,6 @@ largest reduction in entropy.  This has a cost of
 total cost over the entire trees (by summing the cost at each node) of
 :math:`O(n_{features}n_{samples}^{2}\log(n_{samples}))`.
 
-Scikit-learn offers a more efficient implementation for the construction of
-decision trees.  A naive implementation (as above) would recompute the class
-label histograms (for classification) or the means (for regression) at for each
-new split point along a given feature. Presorting the feature over all
-relevant samples, and retaining a running label count, will reduce the complexity
-at each node to :math:`O(n_{features}\log(n_{samples}))`, which results in a
-total cost of :math:`O(n_{features}n_{samples}\log(n_{samples}))`. This is an option
-for all tree based algorithms. By default it is turned on for gradient boosting,
-where in general it makes training faster, but turned off for all other algorithms as
-it tends to slow down training when training deep trees.
-
 
 Tips on practical use
 =====================

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
@@ -176,6 +176,13 @@ Changelog
   `predict_proba` give consistent results.
   :pr:`14114` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |API| ``presort`` is now deprecated in
+  :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor`, and the parameter has no effect.
+  Users are recommended to use :class:`ensemble.HistGradientBoostingClassifier`
+  and :class:`ensemble.HistGradientBoostingRegressor` instead.
+  :pr:`14907` by `Adrin Jalali`_.
+
 :mod:`sklearn.feature_extraction`
 .................................
 
@@ -386,6 +393,11 @@ Changelog
   and :class:`ensemble.GradientBoostingRegressor`.
   :pr:`12887` by `Thomas Fan`_.
 
+- |API| ``presort`` is now deprecated in
+  :class:`tree.DecisionTreeClassifier` and
+  :class:`tree.DecisionTreeRegressor`, and the parameter has no effect.
+  :pr:`14907` by `Adrin Jalali`_.
+
 :mod:`sklearn.preprocessing`
 ............................
 

diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
@@ -1172,7 +1172,7 @@ def __init__(self, loss, learning_rate, n_estimators, criterion,
                  max_depth, min_impurity_decrease, min_impurity_split,
                  init, subsample, max_features, ccp_alpha,
                  random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,
-                 warm_start=False, presort='auto',
+                 warm_start=False, presort='deprecated',
                  validation_fraction=0.1, n_iter_no_change=None,
                  tol=1e-4):
 
@@ -1234,7 +1234,6 @@ def _fit_stage(self, i, X, y, raw_predictions, sample_weight, sample_mask,
                 max_features=self.max_features,
                 max_leaf_nodes=self.max_leaf_nodes,
                 random_state=random_state,
-                presort=self.presort,
                 ccp_alpha=self.ccp_alpha)
 
             if self.subsample < 1.0:
@@ -1334,10 +1333,13 @@ def _check_params(self):
                              "integer. %r was passed"
                              % self.n_iter_no_change)
 
-        allowed_presort = ('auto', True, False)
-        if self.presort not in allowed_presort:
-            raise ValueError("'presort' should be in {}. Got {!r} instead."
-                             .format(allowed_presort, self.presort))
+        if self.presort != 'deprecated':
+            warnings.warn("The parameter 'presort' is deprecated and has no "
+                          "effect. It will be removed in v0.24. You can "
+                          "suppress this warning by not passing any value "
+                          "to the 'presort' parameter. We also recommend "
+                          "using HistGradientBoosting models instead.",
+                          DeprecationWarning)
 
     def _init_state(self):
         """Initialize model state and allocate model state data structures. """
@@ -1527,20 +1529,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
             raw_predictions = self._raw_predict(X)
             self._resize_state()
 
-        if self.presort is True and issparse(X):
-            raise ValueError(
-                "Presorting is not supported for sparse matrices.")
-
-        presort = self.presort
-        # Allow presort to be 'auto', which means True if the dataset is dense,
-        # otherwise it will be False.
-        if presort == 'auto':
-            presort = not issparse(X)
-
         X_idx_sorted = None
-        if presort:
-            X_idx_sorted = np.asfortranarray(np.argsort(X, axis=0),
-                                             dtype=np.int32)
 
         # fit the boosting stages
         n_stages = self._fit_stages(
@@ -1967,14 +1956,10 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
         and add more estimators to the ensemble, otherwise, just erase the
         previous solution. See :term:`the Glossary <warm_start>`.
 
-    presort : bool or 'auto', optional (default='auto')
-        Whether to presort the data to speed up the finding of best splits in
-        fitting. Auto mode by default will use presorting on dense data and
-        default to normal sorting on sparse data. Setting presort to true on
-        sparse data will raise an error.
+    presort : deprecated, default='deprecated'
+        This parameter is deprecated and will be removed in v0.24.
 
-        .. versionadded:: 0.17
-           *presort* parameter.
+        .. deprecated :: 0.22
 
     validation_fraction : float, optional, default 0.1
         The proportion of training data to set aside as validation set for
@@ -2082,7 +2067,7 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100,
                  min_impurity_split=None, init=None,
                  random_state=None, max_features=None, verbose=0,
                  max_leaf_nodes=None, warm_start=False,
-                 presort='auto', validation_fraction=0.1,
+                 presort='deprecated', validation_fraction=0.1,
                  n_iter_no_change=None, tol=1e-4, ccp_alpha=0.0):
 
         super().__init__(
@@ -2447,14 +2432,10 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
         and add more estimators to the ensemble, otherwise, just erase the
         previous solution. See :term:`the Glossary <warm_start>`.
 
-    presort : bool or 'auto', optional (default='auto')
-        Whether to presort the data to speed up the finding of best splits in
-        fitting. Auto mode by default will use presorting on dense data and
-        default to normal sorting on sparse data. Setting presort to true on
-        sparse data will raise an error.
+    presort : deprecated, default='deprecated'
+        This parameter is deprecated and will be removed in v0.24.
 
-        .. versionadded:: 0.17
-           optional parameter *presort*.
+        .. deprecated :: 0.22
 
     validation_fraction : float, optional, default 0.1
         The proportion of training data to set aside as validation set for
@@ -2548,7 +2529,8 @@ def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100,
                  max_depth=3, min_impurity_decrease=0.,
                  min_impurity_split=None, init=None, random_state=None,
                  max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,
-                 warm_start=False, presort='auto', validation_fraction=0.1,
+                 warm_start=False, presort='deprecated',
+                 validation_fraction=0.1,
                  n_iter_no_change=None, tol=1e-4, ccp_alpha=0.0):
 
         super().__init__(