61
61
# .. note::
62
62
#
63
63
# The fit method of the models used in this notebook all minimize the
64
- # mean squared error to estimate the conditional mean instead of the mean
65
- # absolute error that would fit an estimator of the conditional median.
66
- #
67
- # When reporting performance measure on the test set in the discussion, we
68
- # instead choose to focus on the mean absolute error that is more
69
- # intuitive than the (root) mean squared error. Note, however, that the
70
- # best models for one metric are also the best for the other in this
71
- # study .
64
+ # mean squared error to estimate the conditional mean.
65
+ # The absolute error, however, would estimate the conditional median.
66
+ #
67
+ # Nevertheless, when reporting performance measures on the test set in
68
+ # the discussion, we choose to focus on the mean absolute error instead
69
+ # of the (root) mean squared error because it is more intuitive to
70
+ # interpret. Note, however, that in this study the best models for one
71
+ # metric are also the best ones in terms of the other metric .
72
72
y = df ["count" ] / df ["count" ].max ()
73
73
74
74
# %%
170
170
# efficiently handle heteorogenous tabular data with a mix of categorical and
171
171
# numerical features as long as the number of samples is large enough.
172
172
#
173
- # Here, we do minimal ordinal encoding for the categorical variables and then
173
+ # Here, we use the modern
174
+ # :class:`~sklearn.ensemble.HistGradientBoostingRegressor` with native support
175
+ # for categorical features. Therefore, we only do minimal ordinal encoding for
176
+ # the categorical variables and then
174
177
# let the model know that it should treat those as categorical variables by
175
178
# using a dedicated tree splitting rule. Since we use an ordinal encoder, we
176
179
# pass the list of categorical values explicitly to use a logical order when
213
216
verbose_feature_names_out = False ,
214
217
),
215
218
HistGradientBoostingRegressor (
219
+ max_iter = 300 ,
220
+ early_stopping = True ,
221
+ validation_fraction = 0.1 ,
216
222
categorical_features = categorical_columns ,
217
223
random_state = 42 ,
218
224
),
222
228
#
223
229
# Lets evaluate our gradient boosting model with the mean absolute error of the
224
230
# relative demand averaged across our 5 time-based cross-validation splits:
231
+ import numpy as np
225
232
226
233
227
- def evaluate (model , X , y , cv ):
234
+ def evaluate (model , X , y , cv , model_prop = None , model_step = None ):
228
235
cv_results = cross_validate (
229
236
model ,
230
237
X ,
231
238
y ,
232
239
cv = cv ,
233
240
scoring = ["neg_mean_absolute_error" , "neg_root_mean_squared_error" ],
241
+ return_estimator = model_prop is not None ,
234
242
)
243
+ if model_prop is not None :
244
+ if model_step is not None :
245
+ values = [
246
+ getattr (m [model_step ], model_prop ) for m in cv_results ["estimator" ]
247
+ ]
248
+ else :
249
+ values = [getattr (m , model_prop ) for m in cv_results ["estimator" ]]
250
+ print (f"Mean model.{ model_prop } = { np .mean (values )} " )
235
251
mae = - cv_results ["test_neg_mean_absolute_error" ]
236
252
rmse = - cv_results ["test_neg_root_mean_squared_error" ]
237
253
print (
@@ -240,9 +256,18 @@ def evaluate(model, X, y, cv):
240
256
)
241
257
242
258
243
- evaluate (gbrt_pipeline , X , y , cv = ts_cv )
259
+ evaluate (
260
+ gbrt_pipeline ,
261
+ X ,
262
+ y ,
263
+ cv = ts_cv ,
264
+ model_prop = "n_iter_" ,
265
+ model_step = "histgradientboostingregressor" ,
266
+ )
244
267
245
268
# %%
269
+ # We see that we set `max_iter` large enough such that early stopping took place.
270
+ #
246
271
# This model has an average error around 4 to 5% of the maximum demand. This is
247
272
# quite good for a first trial without any hyper-parameter tuning! We just had
248
273
# to make the categorical variables explicit. Note that the time related
@@ -258,10 +283,8 @@ def evaluate(model, X, y, cv):
258
283
#
259
284
# As usual for linear models, categorical variables need to be one-hot encoded.
260
285
# For consistency, we scale the numerical features to the same 0-1 range using
261
- # class:`sklearn.preprocessing.MinMaxScaler`, although in this case it does not
286
+ # : class:`~ sklearn.preprocessing.MinMaxScaler`, although in this case it does not
262
287
# impact the results much because they are already on comparable scales:
263
- import numpy as np
264
-
265
288
from sklearn .linear_model import RidgeCV
266
289
from sklearn .preprocessing import MinMaxScaler , OneHotEncoder
267
290
@@ -278,10 +301,14 @@ def evaluate(model, X, y, cv):
278
301
)
279
302
280
303
281
- evaluate (naive_linear_pipeline , X , y , cv = ts_cv )
304
+ evaluate (
305
+ naive_linear_pipeline , X , y , cv = ts_cv , model_prop = "alpha_" , model_step = "ridgecv"
306
+ )
282
307
283
308
284
309
# %%
310
+ # It is affirmative to see that the selected `alpha_` is in our specified
311
+ # range.
285
312
#
286
313
# The performance is not good: the average error is around 14% of the maximum
287
314
# demand. This is more than three times higher than the average error of the
0 commit comments