diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index ebcf9c15f3103..4dde1174203fb 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -62,7 +62,7 @@ def f(x): all_models = {} common_params = dict( learning_rate=0.05, - n_estimators=250, + n_estimators=200, max_depth=2, min_samples_leaf=9, min_samples_split=9, @@ -97,7 +97,7 @@ def f(x): fig = plt.figure(figsize=(10, 10)) plt.plot(xx, f(xx), "g:", linewidth=3, label=r"$f(x) = x\,\sin(x)$") plt.plot(X_test, y_test, "b.", markersize=10, label="Test observations") -plt.plot(xx, y_med, "r-", label="Predicted median", color="orange") +plt.plot(xx, y_med, "r-", label="Predicted median") plt.plot(xx, y_pred, "r-", label="Predicted mean") plt.plot(xx, y_upper, "k-") plt.plot(xx, y_lower, "k-") @@ -224,7 +224,7 @@ def coverage_fraction(y, y_low, y_high): # underfit and could not adapt to sinusoidal shape of the signal. # # The hyper-parameters of the model were approximately hand-tuned for the -# median regressor and there is no reason than the same hyper-parameters are +# median regressor and there is no reason that the same hyper-parameters are # suitable for the 5th percentile regressor. # # To confirm this hypothesis, we tune the hyper-parameters of a new regressor @@ -232,17 +232,16 @@ def coverage_fraction(y, y_low, y_high): # cross-validation on the pinball loss with alpha=0.05: # %% -from sklearn.model_selection import RandomizedSearchCV +from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.model_selection import HalvingRandomSearchCV from sklearn.metrics import make_scorer from pprint import pprint - param_grid = dict( - learning_rate=[0.01, 0.05, 0.1], - n_estimators=[100, 150, 200, 250, 300], - max_depth=[2, 5, 10, 15, 20], - min_samples_leaf=[1, 5, 10, 20, 30, 50], - min_samples_split=[2, 5, 10, 20, 30, 50], + learning_rate=[0.05, 0.1, 0.2], + max_depth=[2, 5, 10], + min_samples_leaf=[1, 5, 10, 20], + min_samples_split=[5, 10, 20, 30, 50], ) alpha = 0.05 neg_mean_pinball_loss_05p_scorer = make_scorer( @@ -251,10 +250,12 @@ def coverage_fraction(y, y_low, y_high): greater_is_better=False, # maximize the negative loss ) gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha, random_state=0) -search_05p = RandomizedSearchCV( +search_05p = HalvingRandomSearchCV( gbr, param_grid, - n_iter=10, # increase this if computational budget allows + resource="n_estimators", + max_resources=250, + min_resources=50, scoring=neg_mean_pinball_loss_05p_scorer, n_jobs=2, random_state=0, @@ -262,9 +263,9 @@ def coverage_fraction(y, y_low, y_high): pprint(search_05p.best_params_) # %% -# We observe that the search procedure identifies that deeper trees are needed -# to get a good fit for the 5th percentile regressor. Deeper trees are more -# expressive and less likely to underfit. +# We observe that the hyper-parameters that were hand-tuned for the median +# regressor are in the same range as the hyper-parameters suitable for the 5th +# percentile regressor. # # Let's now tune the hyper-parameters for the 95th percentile regressor. We # need to redefine the `scoring` metric used to select the best model, along @@ -286,15 +287,14 @@ def coverage_fraction(y, y_low, y_high): pprint(search_95p.best_params_) # %% -# This time, shallower trees are selected and lead to a more constant piecewise -# and therefore more robust estimation of the 95th percentile. This is -# beneficial as it avoids overfitting the large outliers of the log-normal -# additive noise. -# -# We can confirm this intuition by displaying the predicted 90% confidence -# interval comprised by the predictions of those two tuned quantile regressors: -# the prediction of the upper 95th percentile has a much coarser shape than the -# prediction of the lower 5th percentile: +# The result shows that the hyper-parameters for the 95th percentile regressor +# identified by the search procedure are roughly in the same range as the hand- +# tuned hyper-parameters for the median regressor and the hyper-parameters +# identified by the search procedure for the 5th percentile regressor. However, +# the hyper-parameter searches did lead to an improved 90% confidence interval +# that is comprised by the predictions of those two tuned quantile regressors. +# Note that the prediction of the upper 95th percentile has a much coarser shape +# than the prediction of the lower 5th percentile because of the outliers: y_lower = search_05p.predict(xx) y_upper = search_95p.predict(xx)