Skip to content

ENH add Huber loss #25966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sklearn/_loss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
HalfSquaredError,
AbsoluteError,
PinballLoss,
HuberLoss,
HalfPoissonLoss,
HalfGammaLoss,
HalfTweedieLoss,
Expand All @@ -20,6 +21,7 @@
"HalfSquaredError",
"AbsoluteError",
"PinballLoss",
"HuberLoss",
"HalfPoissonLoss",
"HalfGammaLoss",
"HalfTweedieLoss",
Expand Down
7 changes: 7 additions & 0 deletions sklearn/_loss/_loss.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ cdef class CyPinballLoss(CyLossFunction):
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil


cdef class CyHuberLoss(CyLossFunction):
cdef public double delta # public makes it accessible from Python
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil


cdef class CyHalfPoissonLoss(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
Expand Down
56 changes: 56 additions & 0 deletions sklearn/_loss/_loss.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ doc_PinballLoss = (
"""
)

doc_HuberLoss = (
"""Huber Loss with identity link.

Domain:
y_true and y_pred all real numbers
delta in positive real numbers

Link:
y_pred = raw_prediction
"""
)

doc_HalfPoissonLoss = (
"""Half Poisson deviance loss with log-link.

Expand Down Expand Up @@ -164,6 +176,9 @@ class_list = [
("CyPinballLoss", doc_PinballLoss, "quantile",
"closs_pinball_loss", None,
"cgradient_pinball_loss", "cgrad_hess_pinball_loss"),
("CyHuberLoss", doc_HuberLoss, "delta",
"closs_huber_loss", None,
"cgradient_huber_loss", "cgrad_hess_huber_loss"),
("CyHalfPoissonLoss", doc_HalfPoissonLoss, None,
"closs_half_poisson", "closs_grad_half_poisson",
"cgradient_half_poisson", "cgrad_hess_half_poisson"),
Expand Down Expand Up @@ -359,6 +374,47 @@ cdef inline double_pair cgrad_hess_pinball_loss(
return gh


# Huber Loss
cdef inline double closs_huber_loss(
double y_true,
double raw_prediction,
double delta,
) noexcept nogil:
cdef double abserr = fabs(y_true - raw_prediction)
if abserr <= delta:
return 0.5 * abserr**2
else:
return delta * (abserr - 0.5 * delta)


cdef inline double cgradient_huber_loss(
double y_true,
double raw_prediction,
double delta,
) noexcept nogil:
cdef double res = raw_prediction - y_true
if fabs(res) <= delta:
return res
else:
return delta if res >=0 else -delta


cdef inline double_pair cgrad_hess_huber_loss(
double y_true,
double raw_prediction,
double delta,
) noexcept nogil:
cdef double_pair gh
gh.val2 = raw_prediction - y_true # used as temporary
if fabs(gh.val2) <= delta:
gh.val1 = gh.val2 # gradient
gh.val2 = 1 # hessian
else:
gh.val1 = delta if gh.val2 >=0 else -delta # gradient
gh.val2 = 0 # hessian
return gh


# Half Poisson Deviance with Log-Link, dropping constant terms
cdef inline double closs_half_poisson(
double y_true,
Expand Down
77 changes: 76 additions & 1 deletion sklearn/_loss/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CyHalfSquaredError,
CyAbsoluteError,
CyPinballLoss,
CyHuberLoss,
CyHalfPoissonLoss,
CyHalfGammaLoss,
CyHalfTweedieLoss,
Expand Down Expand Up @@ -583,7 +584,7 @@ class PinballLoss(BaseLoss):
Additional Attributes
---------------------
quantile : float
The quantile to be estimated. Must be in range (0, 1).
The quantile level of the quantile to be estimated. Must be in range (0, 1).
"""

differentiable = False
Expand Down Expand Up @@ -619,6 +620,79 @@ def fit_intercept_only(self, y_true, sample_weight=None):
)


class HuberLoss(BaseLoss):
"""Huber loss, for regression.

Domain:
y_true and y_pred all real numbers
quantile in (0, 1)

Link:
y_pred = raw_prediction

For a given sample x_i, the Huber loss is defined as::

loss(x_i) = 1/2 * abserr**2 if abserr <= delta
delta * (abserr - delta/2) if abserr > delta

abserr = |y_true_i - raw_prediction_i|
delta = quantile(abserr, self.quantile)

Note: HuberLoss(quantile=1) equals HalfSquaredError and HuberLoss(quantile=0)
equals delta * (AbsoluteError() - delta/2).

Additional Attributes
---------------------
quantile : float
The quantile level which defines the breaking point `delta` to distinguish
between absolute error and squared error. Must be in range (0, 1).

Reference
---------
.. [1] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient
boosting machine <10.1214/aos/1013203451>`.
Annals of Statistics, 29, 1189-1232.
"""

differentiable = False
need_update_leaves_values = True

def __init__(self, sample_weight=None, quantile=0.9, delta=0.5):
check_scalar(
quantile,
"quantile",
target_type=numbers.Real,
min_val=0,
max_val=1,
include_boundaries="neither",
)
self.quantile = quantile # This is better stored outside of Cython.
super().__init__(
closs=CyHuberLoss(delta=float(delta)),
link=IdentityLink(),
)
self.approx_hessian = True
self.constant_hessian = False

def fit_intercept_only(self, y_true, sample_weight=None):
"""Compute raw_prediction of an intercept-only model.

This is the weighted median of the target, i.e. over the samples
axis=0.
"""
# See formula before algo 4 in Friedman (2001), but we apply it to y_true,
# not to the residual y_true - raw_prediction. An estimator like
# HistGradientBoostingRegressor might then call it on the residual, e.g.
# fit_intercept_only(y_true - raw_prediction).
if sample_weight is None:
median = np.percentile(y_true, 50, axis=0)
else:
median = _weighted_percentile(y_true, sample_weight, 50)
diff = y_true - median
term = np.sign(diff) * np.minimum(self.closs.delta, np.abs(diff))
return median + np.average(term, weights=sample_weight)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this average be a weighted? From looking at the current GB losses, is uses a unweighted mean:

tree.value[leaf, 0] = median + np.mean(
np.sign(diff_minus_median) * np.minimum(np.abs(diff_minus_median), gamma)
)

Copy link
Member Author

@lorentzenchr lorentzenchr Apr 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that is should be weighted and that the current GB have an error.
Even if that's false, we can later decide to call with sample_weight=None.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My argument is mainly based on looking at "Greedy Function Approximation" and mentally adding sample_weights everywhere, i.e. the same as for the other losses.

Copy link
Member Author

@lorentzenchr lorentzenchr Apr 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And an even much better argument is the test test_loss_intercept_only which kind of proofs my statements above.



class HalfPoissonLoss(BaseLoss):
"""Half Poisson deviance loss with log-link, for regression.

Expand Down Expand Up @@ -998,6 +1072,7 @@ def gradient_proba(
"squared_error": HalfSquaredError,
"absolute_error": AbsoluteError,
"pinball_loss": PinballLoss,
"huber_loss": HuberLoss,
"poisson_loss": HalfPoissonLoss,
"gamma_loss": HalfGammaLoss,
"tweedie_loss": HalfTweedieLoss,
Expand Down
25 changes: 23 additions & 2 deletions sklearn/_loss/tests/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
HalfSquaredError,
HalfTweedieLoss,
HalfTweedieLossIdentity,
HuberLoss,
PinballLoss,
)
from sklearn.utils import assert_all_finite
Expand All @@ -36,6 +37,7 @@
# HalfTweedieLoss(power=1.5) is already there as default
LOSS_INSTANCES += [
PinballLoss(quantile=0.25),
HuberLoss(quantile=0.75),
HalfTweedieLoss(power=-1.5),
HalfTweedieLoss(power=0),
HalfTweedieLoss(power=1),
Expand All @@ -52,9 +54,11 @@ def loss_instance_name(param):
if isinstance(param, BaseLoss):
loss = param
name = loss.__class__.__name__
if hasattr(loss, "quantile"):
if isinstance(loss, PinballLoss):
name += f"(quantile={loss.closs.quantile})"
elif hasattr(loss, "power"):
elif isinstance(loss, HuberLoss):
name += f"(quantile={loss.quantile}"
elif hasattr(loss, "closs") and hasattr(loss.closs, "power"):
name += f"(power={loss.closs.power})"
return name
else:
Expand Down Expand Up @@ -153,6 +157,7 @@ def test_loss_boundary(loss):
(HalfSquaredError(), [-100, 0, 0.1, 100], [-np.inf, np.inf]),
(AbsoluteError(), [-100, 0, 0.1, 100], [-np.inf, np.inf]),
(PinballLoss(), [-100, 0, 0.1, 100], [-np.inf, np.inf]),
(HuberLoss(), [-100, 0, 0.1, 100], [-np.inf, np.inf]),
(HalfPoissonLoss(), [0.1, 100], [-np.inf, -3, -0.1, np.inf]),
(HalfGammaLoss(), [0.1, 100], [-np.inf, -3, -0.1, 0, np.inf]),
(HalfTweedieLoss(power=-3), [0.1, 100], [-np.inf, np.inf]),
Expand All @@ -173,6 +178,7 @@ def test_loss_boundary(loss):
Y_TRUE_PARAMS = [ # type: ignore
# (loss, [y success], [y fail])
(HalfPoissonLoss(), [0], []),
(HuberLoss(), [0], []),
(HalfTweedieLoss(power=-3), [-100, -0.1, 0], []),
(HalfTweedieLoss(power=0), [-100, 0], []),
(HalfTweedieLoss(power=1.5), [0], []),
Expand Down Expand Up @@ -226,6 +232,8 @@ def test_loss_boundary_y_pred(loss, y_pred_success, y_pred_fail):
(PinballLoss(quantile=0.5), 1.0, 5.0, 2),
(PinballLoss(quantile=0.25), 1.0, 5.0, 4 * (1 - 0.25)),
(PinballLoss(quantile=0.25), 5.0, 1.0, 4 * 0.25),
(HuberLoss(quantile=0.5, delta=3), 1.0, 5.0, 3 * (4 - 3 / 2)),
(HuberLoss(quantile=0.5, delta=3), 1.0, 3.0, 0.5 * 2**2),
(HalfPoissonLoss(), 2.0, np.log(4), 4 - 2 * np.log(4)),
(HalfGammaLoss(), 2.0, np.log(4), np.log(4) + 2 / 4),
(HalfTweedieLoss(power=3), 2.0, np.log(4), -1 / 4 + 1 / 4**2),
Expand Down Expand Up @@ -1090,6 +1098,19 @@ def test_init_gradient_and_hessian_raises(loss, params, err_msg):
"quantile == 0, must be > 0.",
),
(PinballLoss, {"quantile": 1.1}, ValueError, "quantile == 1.1, must be < 1."),
(
HuberLoss,
{"quantile": None},
TypeError,
"quantile must be an instance of float, not NoneType.",
),
(
HuberLoss,
{"quantile": 0},
ValueError,
"quantile == 0, must be > 0.",
),
(HuberLoss, {"quantile": 1.1}, ValueError, "quantile == 1.1, must be < 1."),
],
)
def test_loss_init_parameter_validation(loss, params, err_type, err_msg):
Expand Down