Skip to content

FIX fixes memory leak seen in PyPy in C losses #27670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ random sampling procedures.
solvers (when fit on the same data again). The amount of change depends on the
specified `tol`, for small values you will get more precise results.

- |Fix| fixes a memory leak seen in PyPy for estimators using the Cython loss functions.
:pr:`27670` by :user:`Guillaume Lemaitre <glemaitre>`.

Changes impacting all modules
-----------------------------

Expand Down
57 changes: 11 additions & 46 deletions sklearn/_loss/_loss.pyx.tp
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,9 @@ cdef class CyLossFunction:
floating_out[::1] loss_out, # OUT
int n_threads=1
):
"""Compute the pointwise loss value for each input.
"""Compute the point-wise loss value for each input.

The point-wise loss is written to `loss_out` and no array is returned.

Parameters
----------
Expand All @@ -884,11 +886,6 @@ cdef class CyLossFunction:
A location into which the result is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
loss : array of shape (n_samples,)
Element-wise loss function.
"""
pass

Expand All @@ -902,6 +899,8 @@ cdef class CyLossFunction:
):
"""Compute gradient of loss w.r.t raw_prediction for each input.

The gradient is written to `gradient_out` and no array is returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -914,11 +913,6 @@ cdef class CyLossFunction:
A location into which the result is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
gradient : array of shape (n_samples,)
Element-wise gradients.
"""
pass

Expand All @@ -933,6 +927,9 @@ cdef class CyLossFunction:
):
"""Compute loss and gradient of loss w.r.t raw_prediction.

The loss and gradient are written to `loss_out` and `gradient_out` and no arrays
are returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -947,18 +944,9 @@ cdef class CyLossFunction:
A location into which the gradient is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
loss : array of shape (n_samples,)
Element-wise loss function.

gradient : array of shape (n_samples,)
Element-wise gradients.
"""
self.loss(y_true, raw_prediction, sample_weight, loss_out, n_threads)
self.gradient(y_true, raw_prediction, sample_weight, gradient_out, n_threads)
return np.asarray(loss_out), np.asarray(gradient_out)

def gradient_hessian(
self,
Expand All @@ -971,6 +959,9 @@ cdef class CyLossFunction:
):
"""Compute gradient and hessian of loss w.r.t raw_prediction.

The gradient and hessian are written to `gradient_out` and `hessian_out` and no
arrays are returned.

Parameters
----------
y_true : array of shape (n_samples,)
Expand All @@ -985,14 +976,6 @@ cdef class CyLossFunction:
A location into which the hessian is stored.
n_threads : int
Number of threads used by OpenMP (if any).

Returns
-------
gradient : array of shape (n_samples,)
Element-wise gradients.

hessian : array of shape (n_samples,)
Element-wise hessians.
"""
pass

Expand Down Expand Up @@ -1045,8 +1028,6 @@ cdef class {{name}}(CyLossFunction):
):
loss_out[i] = sample_weight[i] * {{closs}}(y_true[i], raw_prediction[i]{{with_param}})

return np.asarray(loss_out)

{{if closs_grad is not None}}
def loss_gradient(
self,
Expand Down Expand Up @@ -1077,7 +1058,6 @@ cdef class {{name}}(CyLossFunction):
loss_out[i] = sample_weight[i] * dbl2.val1
gradient_out[i] = sample_weight[i] * dbl2.val2

return np.asarray(loss_out), np.asarray(gradient_out)
{{endif}}

def gradient(
Expand All @@ -1103,8 +1083,6 @@ cdef class {{name}}(CyLossFunction):
):
gradient_out[i] = sample_weight[i] * {{cgrad}}(y_true[i], raw_prediction[i]{{with_param}})

return np.asarray(gradient_out)

def gradient_hessian(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1134,8 +1112,6 @@ cdef class {{name}}(CyLossFunction):
gradient_out[i] = sample_weight[i] * dbl2.val1
hessian_out[i] = sample_weight[i] * dbl2.val2

return np.asarray(gradient_out), np.asarray(hessian_out)

{{endfor}}


Expand Down Expand Up @@ -1216,8 +1192,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(loss_out)

def loss_gradient(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1278,8 +1252,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(loss_out), np.asarray(gradient_out)

def gradient(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1327,8 +1299,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(gradient_out)

def gradient_hessian(
self,
const floating_in[::1] y_true, # IN
Expand Down Expand Up @@ -1381,9 +1351,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):

free(p)

return np.asarray(gradient_out), np.asarray(hessian_out)


# This method simplifies the implementation of hessp in linear models,
# i.e. the matrix-vector product of the full hessian, not only of the
# diagonal (in the classes) approximation as implemented above.
Expand Down Expand Up @@ -1434,5 +1401,3 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
gradient_out[i, k] = (proba_out[i, k] - (y_true[i] == k)) * sample_weight[i]

free(p)

return np.asarray(gradient_out), np.asarray(proba_out)
15 changes: 10 additions & 5 deletions sklearn/_loss/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,14 @@ def loss(
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
raw_prediction = raw_prediction.squeeze(1)

return self.closs.loss(
self.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=loss_out,
n_threads=n_threads,
)
return loss_out

def loss_gradient(
self,
Expand Down Expand Up @@ -250,14 +251,15 @@ def loss_gradient(
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
gradient_out = gradient_out.squeeze(1)

return self.closs.loss_gradient(
self.closs.loss_gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=loss_out,
gradient_out=gradient_out,
n_threads=n_threads,
)
return loss_out, gradient_out

def gradient(
self,
Expand Down Expand Up @@ -299,13 +301,14 @@ def gradient(
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
gradient_out = gradient_out.squeeze(1)

return self.closs.gradient(
self.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
n_threads=n_threads,
)
return gradient_out

def gradient_hessian(
self,
Expand Down Expand Up @@ -363,14 +366,15 @@ def gradient_hessian(
if hessian_out.ndim == 2 and hessian_out.shape[1] == 1:
hessian_out = hessian_out.squeeze(1)

return self.closs.gradient_hessian(
self.closs.gradient_hessian(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
hessian_out=hessian_out,
n_threads=n_threads,
)
return gradient_out, hessian_out

def __call__(self, y_true, raw_prediction, sample_weight=None, n_threads=1):
"""Compute the weighted average loss.
Expand Down Expand Up @@ -1075,14 +1079,15 @@ def gradient_proba(
elif proba_out is None:
proba_out = np.empty_like(gradient_out)

return self.closs.gradient_proba(
self.closs.gradient_proba(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=gradient_out,
proba_out=proba_out,
n_threads=n_threads,
)
return gradient_out, proba_out


class ExponentialLoss(BaseLoss):
Expand Down
50 changes: 24 additions & 26 deletions sklearn/_loss/tests/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,34 +383,32 @@ def test_loss_same_as_C_functions(loss, sample_weight):
out_g2 = np.empty_like(raw_prediction)
out_h1 = np.empty_like(raw_prediction)
out_h2 = np.empty_like(raw_prediction)
assert_allclose(
loss.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l1,
),
loss.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l2,
),
loss.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l1,
)
assert_allclose(
loss.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g1,
),
loss.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g2,
),
loss.closs.loss(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
loss_out=out_l2,
),
assert_allclose(out_l1, out_l2)
loss.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g1,
)
loss.closs.gradient(
y_true=y_true,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
gradient_out=out_g2,
)
assert_allclose(out_g1, out_g2)
loss.closs.loss_gradient(
y_true=y_true,
raw_prediction=raw_prediction,
Expand Down