-
-
Notifications
You must be signed in to change notification settings - Fork 26k
ENH add Huber loss #25966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH add Huber loss #25966
Changes from all commits
aa10916
db8a49a
ba38c54
62db03e
9715288
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -22,6 +22,7 @@ | |||||||
CyHalfSquaredError, | ||||||||
CyAbsoluteError, | ||||||||
CyPinballLoss, | ||||||||
CyHuberLoss, | ||||||||
CyHalfPoissonLoss, | ||||||||
CyHalfGammaLoss, | ||||||||
CyHalfTweedieLoss, | ||||||||
|
@@ -583,7 +584,7 @@ class PinballLoss(BaseLoss): | |||||||
Additional Attributes | ||||||||
--------------------- | ||||||||
quantile : float | ||||||||
The quantile to be estimated. Must be in range (0, 1). | ||||||||
The quantile level of the quantile to be estimated. Must be in range (0, 1). | ||||||||
""" | ||||||||
|
||||||||
differentiable = False | ||||||||
|
@@ -619,6 +620,79 @@ def fit_intercept_only(self, y_true, sample_weight=None): | |||||||
) | ||||||||
|
||||||||
|
||||||||
class HuberLoss(BaseLoss): | ||||||||
"""Huber loss, for regression. | ||||||||
|
||||||||
Domain: | ||||||||
y_true and y_pred all real numbers | ||||||||
quantile in (0, 1) | ||||||||
|
||||||||
Link: | ||||||||
y_pred = raw_prediction | ||||||||
|
||||||||
For a given sample x_i, the Huber loss is defined as:: | ||||||||
|
||||||||
loss(x_i) = 1/2 * abserr**2 if abserr <= delta | ||||||||
delta * (abserr - delta/2) if abserr > delta | ||||||||
|
||||||||
abserr = |y_true_i - raw_prediction_i| | ||||||||
delta = quantile(abserr, self.quantile) | ||||||||
|
||||||||
Note: HuberLoss(quantile=1) equals HalfSquaredError and HuberLoss(quantile=0) | ||||||||
equals delta * (AbsoluteError() - delta/2). | ||||||||
|
||||||||
Additional Attributes | ||||||||
--------------------- | ||||||||
quantile : float | ||||||||
The quantile level which defines the breaking point `delta` to distinguish | ||||||||
between absolute error and squared error. Must be in range (0, 1). | ||||||||
|
||||||||
Reference | ||||||||
--------- | ||||||||
.. [1] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient | ||||||||
boosting machine <10.1214/aos/1013203451>`. | ||||||||
Annals of Statistics, 29, 1189-1232. | ||||||||
""" | ||||||||
|
||||||||
differentiable = False | ||||||||
need_update_leaves_values = True | ||||||||
|
||||||||
def __init__(self, sample_weight=None, quantile=0.9, delta=0.5): | ||||||||
check_scalar( | ||||||||
quantile, | ||||||||
"quantile", | ||||||||
target_type=numbers.Real, | ||||||||
min_val=0, | ||||||||
max_val=1, | ||||||||
include_boundaries="neither", | ||||||||
) | ||||||||
self.quantile = quantile # This is better stored outside of Cython. | ||||||||
super().__init__( | ||||||||
closs=CyHuberLoss(delta=float(delta)), | ||||||||
link=IdentityLink(), | ||||||||
) | ||||||||
self.approx_hessian = True | ||||||||
self.constant_hessian = False | ||||||||
|
||||||||
def fit_intercept_only(self, y_true, sample_weight=None): | ||||||||
"""Compute raw_prediction of an intercept-only model. | ||||||||
|
||||||||
This is the weighted median of the target, i.e. over the samples | ||||||||
axis=0. | ||||||||
""" | ||||||||
# See formula before algo 4 in Friedman (2001), but we apply it to y_true, | ||||||||
# not to the residual y_true - raw_prediction. An estimator like | ||||||||
# HistGradientBoostingRegressor might then call it on the residual, e.g. | ||||||||
# fit_intercept_only(y_true - raw_prediction). | ||||||||
if sample_weight is None: | ||||||||
median = np.percentile(y_true, 50, axis=0) | ||||||||
else: | ||||||||
median = _weighted_percentile(y_true, sample_weight, 50) | ||||||||
diff = y_true - median | ||||||||
term = np.sign(diff) * np.minimum(self.closs.delta, np.abs(diff)) | ||||||||
return median + np.average(term, weights=sample_weight) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this average be a weighted? From looking at the current GB losses, is uses a unweighted mean: scikit-learn/sklearn/ensemble/_gb_losses.py Lines 490 to 492 in 1834cd6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that is should be weighted and that the current GB have an error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My argument is mainly based on looking at "Greedy Function Approximation" and mentally adding sample_weights everywhere, i.e. the same as for the other losses. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And an even much better argument is the test |
||||||||
|
||||||||
|
||||||||
class HalfPoissonLoss(BaseLoss): | ||||||||
"""Half Poisson deviance loss with log-link, for regression. | ||||||||
|
||||||||
|
@@ -998,6 +1072,7 @@ def gradient_proba( | |||||||
"squared_error": HalfSquaredError, | ||||||||
"absolute_error": AbsoluteError, | ||||||||
"pinball_loss": PinballLoss, | ||||||||
"huber_loss": HuberLoss, | ||||||||
"poisson_loss": HalfPoissonLoss, | ||||||||
"gamma_loss": HalfGammaLoss, | ||||||||
"tweedie_loss": HalfTweedieLoss, | ||||||||
|
Uh oh!
There was an error while loading. Please reload this page.