Skip to content

MNT replace Cython loss functions in SGD part 1 #27999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions sklearn/linear_model/_sag_fast.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ cdef {{c_type}} _logsumexp{{name_suffix}}({{c_type}}* arr, int n_classes) noexce
{{for name_suffix, c_type, np_type in dtypes}}

cdef class MultinomialLogLoss{{name_suffix}}:
cdef {{c_type}} _loss(self, {{c_type}}* prediction, {{c_type}} y, int n_classes,
cdef {{c_type}} _loss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
{{c_type}} sample_weight) noexcept nogil:
r"""Multinomial Logistic regression loss.

Expand All @@ -100,12 +100,12 @@ cdef class MultinomialLogLoss{{name_suffix}}:

Parameters
----------
prediction : pointer to a np.ndarray[{{c_type}}] of shape (n_classes,)
Prediction of the multinomial classifier, for current sample.

y : {{c_type}}, between 0 and n_classes - 1
Indice of the correct class for current sample (i.e. label encoded).

prediction : pointer to a np.ndarray[{{c_type}}] of shape (n_classes,)
Prediction of the multinomial classifier, for current sample.

n_classes : integer
Total number of classes.

Expand All @@ -129,7 +129,7 @@ cdef class MultinomialLogLoss{{name_suffix}}:
loss = (logsumexp_prediction - prediction[int(y)]) * sample_weight
return loss

cdef void dloss(self, {{c_type}}* prediction, {{c_type}} y, int n_classes,
cdef void dloss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes,
{{c_type}} sample_weight, {{c_type}}* gradient_ptr) noexcept nogil:
r"""Multinomial Logistic regression gradient of the loss.

Expand Down Expand Up @@ -414,9 +414,9 @@ def sag{{name_suffix}}(

# compute the gradient for this sample, given the prediction
if multinomial:
multiloss.dloss(&prediction[0], y, n_classes, sample_weight, &gradient[0])
multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0])
else:
gradient[0] = loss.dloss(prediction[0], y) * sample_weight
gradient[0] = loss.dloss(y, prediction[0]) * sample_weight

# L2 regularization by simply rescaling the weights
wscale *= wscale_update
Expand Down Expand Up @@ -835,10 +835,10 @@ def _multinomial_grad_loss_all_samples(
)

# compute the gradient for this sample, given the prediction
multiloss.dloss(&prediction[0], y, n_classes, sample_weight, &gradient[0])
multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0])

# compute the loss for this sample, given the prediction
sum_loss += multiloss._loss(&prediction[0], y, n_classes, sample_weight)
sum_loss += multiloss._loss(y, &prediction[0], n_classes, sample_weight)

# update the sum of the gradient
for j in range(xnnz):
Expand Down
20 changes: 10 additions & 10 deletions sklearn/linear_model/_sgd_fast.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,25 @@
"""Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""

cdef class LossFunction:
cdef double loss(self, double p, double y) noexcept nogil
cdef double dloss(self, double p, double y) noexcept nogil
cdef double loss(self, double y, double p) noexcept nogil
cdef double dloss(self, double y, double p) noexcept nogil


cdef class Regression(LossFunction):
cdef double loss(self, double p, double y) noexcept nogil
cdef double dloss(self, double p, double y) noexcept nogil
cdef double loss(self, double y, double p) noexcept nogil
cdef double dloss(self, double y, double p) noexcept nogil


cdef class Classification(LossFunction):
cdef double loss(self, double p, double y) noexcept nogil
cdef double dloss(self, double p, double y) noexcept nogil
cdef double loss(self, double y, double p) noexcept nogil
cdef double dloss(self, double y, double p) noexcept nogil


cdef class Log(Classification):
cdef double loss(self, double p, double y) noexcept nogil
cdef double dloss(self, double p, double y) noexcept nogil
cdef double loss(self, double y, double p) noexcept nogil
cdef double dloss(self, double y, double p) noexcept nogil


cdef class SquaredLoss(Regression):
cdef double loss(self, double p, double y) noexcept nogil
cdef double dloss(self, double p, double y) noexcept nogil
cdef double loss(self, double y, double p) noexcept nogil
cdef double dloss(self, double y, double p) noexcept nogil
66 changes: 33 additions & 33 deletions sklearn/linear_model/_sgd_fast.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ cdef extern from *:
cdef class LossFunction:
"""Base class for convex loss functions"""

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
"""Evaluate the loss function.

Parameters
----------
p : double
The prediction, `p = w^T x + intercept`.
y : double
The true value (aka target).
p : double
The prediction, `p = w^T x + intercept`.

Returns
-------
Expand All @@ -111,7 +111,7 @@ cdef class LossFunction:
double
The derivative of the loss function with regards to `p`.
"""
return self.dloss(p, y)
return self.dloss(y, p)

def py_loss(self, double p, double y):
"""Python version of `loss` for testing.
Expand All @@ -130,18 +130,18 @@ cdef class LossFunction:
double
The loss evaluated at `p` and `y`.
"""
return self.loss(p, y)
return self.loss(y, p)

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
"""Evaluate the derivative of the loss function with respect to
the prediction `p`.

Parameters
----------
p : double
The prediction, `p = w^T x`.
y : double
The true value (aka target).
p : double
The prediction, `p = w^T x`.

Returns
-------
Expand All @@ -154,20 +154,20 @@ cdef class LossFunction:
cdef class Regression(LossFunction):
"""Base class for loss functions for regression"""

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
return 0.

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
return 0.


cdef class Classification(LossFunction):
"""Base class for loss functions for classification"""

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
return 0.

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
return 0.


Expand All @@ -179,7 +179,7 @@ cdef class ModifiedHuber(Classification):
See T. Zhang 'Solving Large Scale Linear Prediction Problems Using
Stochastic Gradient Descent', ICML'04.
"""
cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double z = p * y
if z >= 1.0:
return 0.0
Expand All @@ -188,7 +188,7 @@ cdef class ModifiedHuber(Classification):
else:
return -4.0 * z

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double z = p * y
if z >= 1.0:
return 0.0
Expand Down Expand Up @@ -217,13 +217,13 @@ cdef class Hinge(Classification):
def __init__(self, double threshold=1.0):
self.threshold = threshold

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double z = p * y
if z <= self.threshold:
return self.threshold - z
return 0.0

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double z = p * y
if z <= self.threshold:
return -y
Expand All @@ -249,13 +249,13 @@ cdef class SquaredHinge(Classification):
def __init__(self, double threshold=1.0):
self.threshold = threshold

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double z = self.threshold - p * y
if z > 0:
return z * z
return 0.0

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double z = self.threshold - p * y
if z > 0:
return -2 * y * z
Expand All @@ -268,7 +268,7 @@ cdef class SquaredHinge(Classification):
cdef class Log(Classification):
"""Logistic regression loss for binary classification with y in {-1, 1}"""

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double z = p * y
# approximately equal and saves the computation of the log
if z > 18:
Expand All @@ -277,7 +277,7 @@ cdef class Log(Classification):
return -z
return log(1.0 + exp(-z))

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double z = p * y
# approximately equal and saves the computation of the log
if z > 18.0:
Expand All @@ -292,10 +292,10 @@ cdef class Log(Classification):

cdef class SquaredLoss(Regression):
"""Squared loss traditional used in linear regression."""
cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
return 0.5 * (p - y) * (p - y)

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
return p - y

def __reduce__(self):
Expand All @@ -316,15 +316,15 @@ cdef class Huber(Regression):
def __init__(self, double c):
self.c = c

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double r = p - y
cdef double abs_r = fabs(r)
if abs_r <= self.c:
return 0.5 * r * r
else:
return self.c * abs_r - (0.5 * self.c * self.c)

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double r = p - y
cdef double abs_r = fabs(r)
if abs_r <= self.c:
Expand All @@ -349,11 +349,11 @@ cdef class EpsilonInsensitive(Regression):
def __init__(self, double epsilon):
self.epsilon = epsilon

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double ret = fabs(y - p) - self.epsilon
return ret if ret > 0 else 0

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
if y - p > self.epsilon:
return -1
elif p - y > self.epsilon:
Expand All @@ -376,11 +376,11 @@ cdef class SquaredEpsilonInsensitive(Regression):
def __init__(self, double epsilon):
self.epsilon = epsilon

cdef double loss(self, double p, double y) noexcept nogil:
cdef double loss(self, double y, double p) noexcept nogil:
cdef double ret = fabs(y - p) - self.epsilon
return ret * ret if ret > 0 else 0

cdef double dloss(self, double p, double y) noexcept nogil:
cdef double dloss(self, double y, double p) noexcept nogil:
cdef double z
z = y - p
if z > self.epsilon:
Expand Down Expand Up @@ -569,7 +569,7 @@ def _plain_sgd{{name_suffix}}(
if learning_rate == OPTIMAL:
typw = np.sqrt(1.0 / np.sqrt(alpha))
# computing eta0, the initial learning rate
initial_eta0 = typw / max(1.0, loss.dloss(-typw, 1.0))
initial_eta0 = typw / max(1.0, loss.dloss(1.0, -typw))
# initialize t such that eta at first sample equals eta0
optimal_init = 1.0 / (initial_eta0 * alpha)

Expand Down Expand Up @@ -598,7 +598,7 @@ def _plain_sgd{{name_suffix}}(
eta = eta0 / pow(t, power_t)

if verbose or not early_stopping:
sumloss += loss.loss(p, y)
sumloss += loss.loss(y, p)

if y > 0.0:
class_weight = weight_pos
Expand All @@ -609,12 +609,12 @@ def _plain_sgd{{name_suffix}}(
update = sqnorm(x_data_ptr, x_ind_ptr, xnnz)
if update == 0:
continue
update = min(C, loss.loss(p, y) / update)
update = min(C, loss.loss(y, p) / update)
elif learning_rate == PA2:
update = sqnorm(x_data_ptr, x_ind_ptr, xnnz)
update = loss.loss(p, y) / (update + 0.5 / C)
update = loss.loss(y, p) / (update + 0.5 / C)
else:
dloss = loss.dloss(p, y)
dloss = loss.dloss(y, p)
# clip dloss with large values to avoid numerical
# instabilities
if dloss < -MAX_DLOSS:
Expand Down